Skip to content

Commit d290456

Browse files
authored
[X86] combineConcatVectorOps - fold concat(EXTEND_VECTOR_INREG(x),EXTEND_VECTOR_INREG(y)) -> EXTEND_VECTOR_INREG(unpack(x,y)) (#127502)
Concat/unpack the src subvectors together in the bottom 128-bit vector and then extend with a single EXTEND/EXTEND_VECTOR_INREG instruction Required the getEXTEND_VECTOR_INREG helper to be tweaked to accept EXTEND_VECTOR_INREG opcodes as well to avoid us having to remap the opcode between both types.
1 parent c5ea469 commit d290456

File tree

3 files changed

+1885
-2350
lines changed

3 files changed

+1885
-2350
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4679,9 +4679,24 @@ static SDValue getEXTEND_VECTOR_INREG(unsigned Opcode, const SDLoc &DL, EVT VT,
46794679
SDValue In, SelectionDAG &DAG) {
46804680
EVT InVT = In.getValueType();
46814681
assert(VT.isVector() && InVT.isVector() && "Expected vector VTs.");
4682-
assert((ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode ||
4683-
ISD::ZERO_EXTEND == Opcode) &&
4684-
"Unknown extension opcode");
4682+
4683+
// Canonicalize Opcode to general extension version.
4684+
switch (Opcode) {
4685+
case ISD::ANY_EXTEND:
4686+
case ISD::ANY_EXTEND_VECTOR_INREG:
4687+
Opcode = ISD::ANY_EXTEND;
4688+
break;
4689+
case ISD::SIGN_EXTEND:
4690+
case ISD::SIGN_EXTEND_VECTOR_INREG:
4691+
Opcode = ISD::SIGN_EXTEND;
4692+
break;
4693+
case ISD::ZERO_EXTEND:
4694+
case ISD::ZERO_EXTEND_VECTOR_INREG:
4695+
Opcode = ISD::ZERO_EXTEND;
4696+
break;
4697+
default:
4698+
llvm_unreachable("Unknown extension opcode");
4699+
}
46854700

46864701
// For 256-bit vectors, we only need the lower (128-bit) input half.
46874702
// For 512-bit vectors, we only need the lower input half or quarter.
@@ -57864,6 +57879,30 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5786457879
}
5786557880
}
5786657881
break;
57882+
case ISD::ANY_EXTEND_VECTOR_INREG:
57883+
case ISD::SIGN_EXTEND_VECTOR_INREG:
57884+
case ISD::ZERO_EXTEND_VECTOR_INREG: {
57885+
// TODO: Handle ANY_EXTEND combos with SIGN/ZERO_EXTEND.
57886+
if (!IsSplat && NumOps == 2 && VT.is256BitVector() &&
57887+
Subtarget.hasInt256() &&
57888+
Op0.getOperand(0).getValueType().is128BitVector() &&
57889+
Op0.getOperand(0).getValueType() ==
57890+
Ops[0].getOperand(0).getValueType()) {
57891+
EVT SrcVT = Op0.getOperand(0).getValueType();
57892+
unsigned NumElts = VT.getVectorNumElements();
57893+
MVT UnpackSVT =
57894+
MVT::getIntegerVT(SrcVT.getScalarSizeInBits() * (NumElts / 2));
57895+
MVT UnpackVT =
57896+
MVT::getVectorVT(UnpackSVT, 128 / UnpackSVT.getScalarSizeInBits());
57897+
SDValue Unpack =
57898+
DAG.getNode(X86ISD::UNPCKL, DL, UnpackVT,
57899+
DAG.getBitcast(UnpackVT, Ops[0].getOperand(0)),
57900+
DAG.getBitcast(UnpackVT, Ops[1].getOperand(0)));
57901+
return getEXTEND_VECTOR_INREG(Op0.getOpcode(), DL, VT,
57902+
DAG.getBitcast(SrcVT, Unpack), DAG);
57903+
}
57904+
break;
57905+
}
5786757906
case X86ISD::VSHLI:
5786857907
case X86ISD::VSRLI:
5786957908
// Special case: SHL/SRL AVX1 V4i64 by 32-bits can lower as a shuffle.

0 commit comments

Comments
 (0)