Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 819a6b7

Browse files
committed
[AArch64] Generalize extract-high DUP extension to MOVI/MVNI.
These are really immediate DUPs, and suffer from the same problem with long instructions with a high/2 variant (e.g. smull). By extending a MOVI (or DUP, before this patch), we can avoid an ext on the other operand of the long instruction, e.g. turning: ext.16b v0, v0, v0, #8 movi.4h v1, #0x53 smull.4s v0, v0, v1 into: movi.8h v1, #0x53 smull2.4s v0, v0, v1 While there, add a now-necessary combine to fold (VT NVCAST (VT x)). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@239799 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent d4521f1 commit 819a6b7

File tree

2 files changed

+234
-15
lines changed

2 files changed

+234
-15
lines changed

lib/Target/AArch64/AArch64ISelLowering.cpp

+24-15
Original file line numberDiff line numberDiff line change
@@ -7580,21 +7580,26 @@ static SDValue tryCombineFixedPointConvert(SDNode *N,
75807580
//
75817581
// This routine does the actual conversion of such DUPs, once outer routines
75827582
// have determined that everything else is in order.
7583+
// It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
7584+
// similarly here.
75837585
static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
7584-
// We can handle most types of duplicate, but the lane ones have an extra
7585-
// operand saying *which* lane, so we need to know.
7586-
bool IsDUPLANE;
75877586
switch (N.getOpcode()) {
75887587
case AArch64ISD::DUP:
7589-
IsDUPLANE = false;
7590-
break;
75917588
case AArch64ISD::DUPLANE8:
75927589
case AArch64ISD::DUPLANE16:
75937590
case AArch64ISD::DUPLANE32:
75947591
case AArch64ISD::DUPLANE64:
7595-
IsDUPLANE = true;
7592+
case AArch64ISD::MOVI:
7593+
case AArch64ISD::MOVIshift:
7594+
case AArch64ISD::MOVIedit:
7595+
case AArch64ISD::MOVImsl:
7596+
case AArch64ISD::MVNIshift:
7597+
case AArch64ISD::MVNImsl:
75967598
break;
75977599
default:
7600+
// FMOV could be supported, but isn't very useful, as it would only occur
7601+
// if you passed a bitcast' floating point immediate to an eligible long
7602+
// integer op (addl, smull, ...).
75987603
return SDValue();
75997604
}
76007605

@@ -7604,17 +7609,11 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
76047609

76057610
MVT ElementTy = NarrowTy.getVectorElementType();
76067611
unsigned NumElems = NarrowTy.getVectorNumElements();
7607-
MVT NewDUPVT = MVT::getVectorVT(ElementTy, NumElems * 2);
7612+
MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
76087613

76097614
SDLoc dl(N);
7610-
SDValue NewDUP;
7611-
if (IsDUPLANE)
7612-
NewDUP = DAG.getNode(N.getOpcode(), dl, NewDUPVT, N.getOperand(0),
7613-
N.getOperand(1));
7614-
else
7615-
NewDUP = DAG.getNode(AArch64ISD::DUP, dl, NewDUPVT, N.getOperand(0));
7616-
7617-
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy, NewDUP,
7615+
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy,
7616+
DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()),
76187617
DAG.getConstant(NumElems, dl, MVT::i64));
76197618
}
76207619

@@ -8913,6 +8912,14 @@ static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG) {
89138912
return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
89148913
}
89158914

8915+
/// Get rid of unnecessary NVCASTs (that don't change the type).
8916+
static SDValue performNVCASTCombine(SDNode *N) {
8917+
if (N->getValueType(0) == N->getOperand(0).getValueType())
8918+
return N->getOperand(0);
8919+
8920+
return SDValue();
8921+
}
8922+
89168923
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
89178924
DAGCombinerInfo &DCI) const {
89188925
SelectionDAG &DAG = DCI.DAG;
@@ -8955,6 +8962,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
89558962
return performCONDCombine(N, DCI, DAG, 2, 3);
89568963
case AArch64ISD::DUP:
89578964
return performPostLD1Combine(N, DCI, false);
8965+
case AArch64ISD::NVCAST:
8966+
return performNVCASTCombine(N);
89588967
case ISD::INSERT_VECTOR_ELT:
89598968
return performPostLD1Combine(N, DCI, true);
89608969
case ISD::INTRINSIC_VOID:

0 commit comments

Comments
 (0)