Skip to content

Commit 10fc06a

Browse files
authored
[X86] canonicalizeShuffleWithOp - add handling for X86ISD::VPERMV nodes (#127625)
Handle different src/mask operand ordering of X86ISD::VPERMV nodes
1 parent 31abb20 commit 10fc06a

File tree

3 files changed

+19
-12
lines changed

3 files changed

+19
-12
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41629,23 +41629,28 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
4162941629
case X86ISD::PSHUFD:
4163041630
case X86ISD::PSHUFHW:
4163141631
case X86ISD::PSHUFLW:
41632+
case X86ISD::VPERMV:
4163241633
case X86ISD::VPERMI:
4163341634
case X86ISD::VPERMILPI: {
41634-
if (N.getOperand(0).getValueType() == ShuffleVT &&
41635-
N->isOnlyUserOf(N.getOperand(0).getNode())) {
41636-
SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
41635+
unsigned SrcIdx = Opc == X86ISD::VPERMV ? 1 : 0;
41636+
if (N.getOperand(SrcIdx).getValueType() == ShuffleVT &&
41637+
N->isOnlyUserOf(N.getOperand(SrcIdx).getNode())) {
41638+
SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(SrcIdx));
4163741639
unsigned SrcOpcode = N0.getOpcode();
4163841640
EVT OpVT = N0.getValueType();
4163941641
if (TLI.isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) {
4164041642
SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
4164141643
SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1));
41642-
bool FoldShuf = Opc != X86ISD::VPERMI;
41644+
bool FoldShuf = Opc != X86ISD::VPERMI && Opc != X86ISD::VPERMV;
4164341645
if (IsMergeableWithShuffle(Op00, FoldShuf) ||
4164441646
IsMergeableWithShuffle(Op01, FoldShuf)) {
4164541647
SDValue LHS, RHS;
4164641648
Op00 = DAG.getBitcast(ShuffleVT, Op00);
4164741649
Op01 = DAG.getBitcast(ShuffleVT, Op01);
41648-
if (N.getNumOperands() == 2) {
41650+
if (Opc == X86ISD::VPERMV) {
41651+
LHS = DAG.getNode(Opc, DL, ShuffleVT, N.getOperand(0), Op00);
41652+
RHS = DAG.getNode(Opc, DL, ShuffleVT, N.getOperand(0), Op01);
41653+
} else if (N.getNumOperands() == 2) {
4164941654
LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, N.getOperand(1));
4165041655
RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, N.getOperand(1));
4165141656
} else {
@@ -41661,11 +41666,13 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
4166141666
if (SrcOpcode == ISD::SINT_TO_FP && IsSafeToMoveShuffle(N0, SrcOpcode) &&
4166241667
OpVT.getScalarSizeInBits() ==
4166341668
N0.getOperand(0).getScalarValueSizeInBits()) {
41664-
SDValue Op00 = DAG.getBitcast(ShuffleVT, N0.getOperand(0));
41665-
SDValue Res =
41666-
N.getNumOperands() == 2
41667-
? DAG.getNode(Opc, DL, ShuffleVT, Op00, N.getOperand(1))
41668-
: DAG.getNode(Opc, DL, ShuffleVT, Op00);
41669+
SDValue Res = DAG.getBitcast(ShuffleVT, N0.getOperand(0));
41670+
if (Opc == X86ISD::VPERMV)
41671+
Res = DAG.getNode(Opc, DL, ShuffleVT, N.getOperand(0), Res);
41672+
else if (N.getNumOperands() == 2)
41673+
Res = DAG.getNode(Opc, DL, ShuffleVT, Res, N.getOperand(1));
41674+
else
41675+
Res = DAG.getNode(Opc, DL, ShuffleVT, Res);
4166941676
Res = DAG.getBitcast(N0.getOperand(0).getValueType(), Res);
4167041677
return DAG.getBitcast(ShuffleVT, DAG.getNode(SrcOpcode, DL, OpVT, Res));
4167141678
}

llvm/test/CodeGen/X86/vector-partial-undef.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,9 @@ define <8 x i32> @xor_undef_elts_alt(<4 x i32> %x) {
151151
; AVX: # %bb.0:
152152
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
153153
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
154-
; AVX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
155154
; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [6,1,5,4,3,2,0,7]
156155
; AVX-NEXT: vpermps %ymm0, %ymm1, %ymm0
156+
; AVX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
157157
; AVX-NEXT: retq
158158
%extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef>
159159
%bogus_bo = xor <8 x i32> %extend, <i32 42, i32 43, i32 undef, i32 undef, i32 undef, i32 undef, i32 44, i32 12>

llvm/test/CodeGen/X86/vector-shuffle-combining.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2469,10 +2469,10 @@ define <8 x i32> @combine_unneeded_subvector1(<8 x i32> %a) {
24692469
;
24702470
; AVX2-FAST-ALL-LABEL: combine_unneeded_subvector1:
24712471
; AVX2-FAST-ALL: # %bb.0:
2472-
; AVX2-FAST-ALL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
24732472
; AVX2-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
24742473
; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
24752474
; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
2475+
; AVX2-FAST-ALL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
24762476
; AVX2-FAST-ALL-NEXT: retq
24772477
;
24782478
; AVX2-FAST-PERLANE-LABEL: combine_unneeded_subvector1:

0 commit comments

Comments
 (0)