Skip to content

Commit 65e86a8

Browse files
committed
[X86] combineConcatVectorOps - concat(bitcast(),bitcast()) -> bitcast(concat())
When concatenating subvector ops together, try to use the pre-bitcasted subop to help expose further combines.
1 parent 0284b4b commit 65e86a8

9 files changed

+8544
-8627
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56165,6 +56165,17 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5616556165
SmallVector<SDValue> Subs;
5616656166
for (SDValue SubOp : SubOps)
5616756167
Subs.push_back(SubOp.getOperand(I));
56168+
// Attempt to peek through bitcasts and concat the original subvectors.
56169+
EVT SubVT = peekThroughBitcasts(Subs[0]).getValueType();
56170+
if (SubVT.isSimple() && SubVT.isVector()) {
56171+
EVT ConcatVT =
56172+
EVT::getVectorVT(*DAG.getContext(), SubVT.getScalarType(),
56173+
SubVT.getVectorElementCount() * Subs.size());
56174+
for (SDValue &Sub : Subs)
56175+
Sub = DAG.getBitcast(SubVT, Sub);
56176+
return DAG.getBitcast(
56177+
VT, DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Subs));
56178+
}
5616856179
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
5616956180
};
5617056181
auto IsConcatFree = [](MVT VT, ArrayRef<SDValue> SubOps, unsigned Op) {

llvm/test/CodeGen/X86/pmul.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -964,7 +964,7 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind {
964964
; AVX512F-LABEL: mul_v64i8:
965965
; AVX512F: # %bb.0: # %entry
966966
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
967-
; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm3 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
967+
; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
968968
; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm4
969969
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
970970
; AVX512F-NEXT: vpmaddubsw %ymm4, %ymm5, %ymm4

llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-6.ll

Lines changed: 1962 additions & 2036 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll

Lines changed: 3540 additions & 3546 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll

Lines changed: 256 additions & 260 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-6.ll

Lines changed: 910 additions & 980 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll

Lines changed: 1148 additions & 1240 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll

Lines changed: 646 additions & 442 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/widen_bitcnt.ll

Lines changed: 70 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -629,71 +629,45 @@ define <8 x i32> @widen_ctlz_v2i32_v8i32(<2 x i32> %a0, <2 x i32> %a1, <2 x i32>
629629
; AVX2: # %bb.0:
630630
; AVX2-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
631631
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
632-
; AVX2-NEXT: vmovq {{.*#+}} xmm5 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
633-
; AVX2-NEXT: vpshufb %xmm0, %xmm5, %xmm4
634-
; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm6
635-
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm7 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
636-
; AVX2-NEXT: vpand %xmm7, %xmm6, %xmm8
637-
; AVX2-NEXT: vpxor %xmm6, %xmm6, %xmm6
638-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm8, %xmm9
639-
; AVX2-NEXT: vpand %xmm4, %xmm9, %xmm4
640-
; AVX2-NEXT: vpshufb %xmm8, %xmm5, %xmm8
641-
; AVX2-NEXT: vpaddb %xmm4, %xmm8, %xmm4
642-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm8
643-
; AVX2-NEXT: vpsrlw $8, %xmm8, %xmm8
644-
; AVX2-NEXT: vpand %xmm4, %xmm8, %xmm8
645-
; AVX2-NEXT: vpsrlw $8, %xmm4, %xmm4
646-
; AVX2-NEXT: vpaddw %xmm4, %xmm8, %xmm4
647-
; AVX2-NEXT: vpshufb %xmm1, %xmm5, %xmm8
648-
; AVX2-NEXT: vpsrlw $4, %xmm1, %xmm9
649-
; AVX2-NEXT: vpand %xmm7, %xmm9, %xmm9
650-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm9, %xmm10
651-
; AVX2-NEXT: vpand %xmm10, %xmm8, %xmm8
652-
; AVX2-NEXT: vpshufb %xmm9, %xmm5, %xmm9
653-
; AVX2-NEXT: vpaddb %xmm9, %xmm8, %xmm8
654-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm9
655-
; AVX2-NEXT: vpsrlw $8, %xmm9, %xmm9
656-
; AVX2-NEXT: vpand %xmm9, %xmm8, %xmm9
657-
; AVX2-NEXT: vpsrlw $8, %xmm8, %xmm8
658-
; AVX2-NEXT: vpaddw %xmm9, %xmm8, %xmm8
659-
; AVX2-NEXT: vpshufb %xmm2, %xmm5, %xmm9
660-
; AVX2-NEXT: vpsrlw $4, %xmm2, %xmm10
661-
; AVX2-NEXT: vpand %xmm7, %xmm10, %xmm10
662-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm10, %xmm11
663-
; AVX2-NEXT: vpand %xmm11, %xmm9, %xmm9
664-
; AVX2-NEXT: vpshufb %xmm10, %xmm5, %xmm10
665-
; AVX2-NEXT: vpaddb %xmm10, %xmm9, %xmm9
666-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm2, %xmm10
667-
; AVX2-NEXT: vpsrlw $8, %xmm10, %xmm10
668-
; AVX2-NEXT: vpand %xmm10, %xmm9, %xmm10
669-
; AVX2-NEXT: vpsrlw $8, %xmm9, %xmm9
670-
; AVX2-NEXT: vpaddw %xmm10, %xmm9, %xmm9
671-
; AVX2-NEXT: vpshufb %xmm3, %xmm5, %xmm10
672-
; AVX2-NEXT: vpsrlw $4, %xmm3, %xmm11
673-
; AVX2-NEXT: vpand %xmm7, %xmm11, %xmm7
674-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm7, %xmm11
675-
; AVX2-NEXT: vpand %xmm11, %xmm10, %xmm10
676-
; AVX2-NEXT: vpshufb %xmm7, %xmm5, %xmm5
677-
; AVX2-NEXT: vpaddb %xmm5, %xmm10, %xmm5
678-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm3, %xmm6
679-
; AVX2-NEXT: vpsrlw $8, %xmm6, %xmm6
680-
; AVX2-NEXT: vpand %xmm6, %xmm5, %xmm6
681-
; AVX2-NEXT: vpsrlw $8, %xmm5, %xmm5
682-
; AVX2-NEXT: vpaddw %xmm6, %xmm5, %xmm5
683-
; AVX2-NEXT: vinserti128 $1, %xmm5, %ymm8, %ymm5
684632
; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
685-
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
686-
; AVX2-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
633+
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
634+
; AVX2-NEXT: # ymm3 = mem[0,1,0,1]
635+
; AVX2-NEXT: vpshufb %ymm1, %ymm3, %ymm4
636+
; AVX2-NEXT: vpsrlw $4, %ymm1, %ymm5
637+
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm6 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
638+
; AVX2-NEXT: vpand %ymm6, %ymm5, %ymm5
639+
; AVX2-NEXT: vpxor %xmm7, %xmm7, %xmm7
640+
; AVX2-NEXT: vpcmpeqb %ymm7, %ymm5, %ymm8
641+
; AVX2-NEXT: vpand %ymm4, %ymm8, %ymm4
642+
; AVX2-NEXT: vpshufb %ymm5, %ymm3, %ymm5
643+
; AVX2-NEXT: vpaddb %ymm5, %ymm4, %ymm4
644+
; AVX2-NEXT: vpcmpeqb %ymm7, %ymm1, %ymm5
645+
; AVX2-NEXT: vpsrlw $8, %ymm5, %ymm5
646+
; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm5
647+
; AVX2-NEXT: vpsrlw $8, %ymm4, %ymm4
648+
; AVX2-NEXT: vpaddw %ymm5, %ymm4, %ymm4
649+
; AVX2-NEXT: vpcmpeqw %ymm7, %ymm1, %ymm1
687650
; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1
688-
; AVX2-NEXT: vpand %ymm1, %ymm5, %ymm1
689-
; AVX2-NEXT: vpsrld $16, %ymm5, %ymm5
690-
; AVX2-NEXT: vpaddd %ymm1, %ymm5, %ymm1
691-
; AVX2-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm4
651+
; AVX2-NEXT: vpand %ymm1, %ymm4, %ymm1
652+
; AVX2-NEXT: vpsrld $16, %ymm4, %ymm4
653+
; AVX2-NEXT: vpaddd %ymm1, %ymm4, %ymm1
692654
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
693-
; AVX2-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
655+
; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm2
656+
; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm4
657+
; AVX2-NEXT: vpand %ymm6, %ymm4, %ymm4
658+
; AVX2-NEXT: vpcmpeqb %ymm7, %ymm4, %ymm5
659+
; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
660+
; AVX2-NEXT: vpshufb %ymm4, %ymm3, %ymm3
661+
; AVX2-NEXT: vpaddb %ymm3, %ymm2, %ymm2
662+
; AVX2-NEXT: vpcmpeqb %ymm7, %ymm0, %ymm3
663+
; AVX2-NEXT: vpsrlw $8, %ymm3, %ymm3
664+
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm3
665+
; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
666+
; AVX2-NEXT: vpaddw %ymm3, %ymm2, %ymm2
667+
; AVX2-NEXT: vpcmpeqw %ymm7, %ymm0, %ymm0
694668
; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
695-
; AVX2-NEXT: vpand %ymm0, %ymm4, %ymm0
696-
; AVX2-NEXT: vpsrld $16, %ymm4, %ymm2
669+
; AVX2-NEXT: vpand %ymm0, %ymm2, %ymm0
670+
; AVX2-NEXT: vpsrld $16, %ymm2, %ymm2
697671
; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0
698672
; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
699673
; AVX2-NEXT: retq
@@ -1008,71 +982,45 @@ define <8 x i32> @widen_ctlz_undef_v2i32_v8i32(<2 x i32> %a0, <2 x i32> %a1, <2
1008982
; AVX2: # %bb.0:
1009983
; AVX2-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
1010984
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1011-
; AVX2-NEXT: vmovq {{.*#+}} xmm5 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
1012-
; AVX2-NEXT: vpshufb %xmm0, %xmm5, %xmm4
1013-
; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm6
1014-
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm7 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1015-
; AVX2-NEXT: vpand %xmm7, %xmm6, %xmm8
1016-
; AVX2-NEXT: vpxor %xmm6, %xmm6, %xmm6
1017-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm8, %xmm9
1018-
; AVX2-NEXT: vpand %xmm4, %xmm9, %xmm4
1019-
; AVX2-NEXT: vpshufb %xmm8, %xmm5, %xmm8
1020-
; AVX2-NEXT: vpaddb %xmm4, %xmm8, %xmm4
1021-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm0, %xmm8
1022-
; AVX2-NEXT: vpsrlw $8, %xmm8, %xmm8
1023-
; AVX2-NEXT: vpand %xmm4, %xmm8, %xmm8
1024-
; AVX2-NEXT: vpsrlw $8, %xmm4, %xmm4
1025-
; AVX2-NEXT: vpaddw %xmm4, %xmm8, %xmm4
1026-
; AVX2-NEXT: vpshufb %xmm1, %xmm5, %xmm8
1027-
; AVX2-NEXT: vpsrlw $4, %xmm1, %xmm9
1028-
; AVX2-NEXT: vpand %xmm7, %xmm9, %xmm9
1029-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm9, %xmm10
1030-
; AVX2-NEXT: vpand %xmm10, %xmm8, %xmm8
1031-
; AVX2-NEXT: vpshufb %xmm9, %xmm5, %xmm9
1032-
; AVX2-NEXT: vpaddb %xmm9, %xmm8, %xmm8
1033-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm1, %xmm9
1034-
; AVX2-NEXT: vpsrlw $8, %xmm9, %xmm9
1035-
; AVX2-NEXT: vpand %xmm9, %xmm8, %xmm9
1036-
; AVX2-NEXT: vpsrlw $8, %xmm8, %xmm8
1037-
; AVX2-NEXT: vpaddw %xmm9, %xmm8, %xmm8
1038-
; AVX2-NEXT: vpshufb %xmm2, %xmm5, %xmm9
1039-
; AVX2-NEXT: vpsrlw $4, %xmm2, %xmm10
1040-
; AVX2-NEXT: vpand %xmm7, %xmm10, %xmm10
1041-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm10, %xmm11
1042-
; AVX2-NEXT: vpand %xmm11, %xmm9, %xmm9
1043-
; AVX2-NEXT: vpshufb %xmm10, %xmm5, %xmm10
1044-
; AVX2-NEXT: vpaddb %xmm10, %xmm9, %xmm9
1045-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm2, %xmm10
1046-
; AVX2-NEXT: vpsrlw $8, %xmm10, %xmm10
1047-
; AVX2-NEXT: vpand %xmm10, %xmm9, %xmm10
1048-
; AVX2-NEXT: vpsrlw $8, %xmm9, %xmm9
1049-
; AVX2-NEXT: vpaddw %xmm10, %xmm9, %xmm9
1050-
; AVX2-NEXT: vpshufb %xmm3, %xmm5, %xmm10
1051-
; AVX2-NEXT: vpsrlw $4, %xmm3, %xmm11
1052-
; AVX2-NEXT: vpand %xmm7, %xmm11, %xmm7
1053-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm7, %xmm11
1054-
; AVX2-NEXT: vpand %xmm11, %xmm10, %xmm10
1055-
; AVX2-NEXT: vpshufb %xmm7, %xmm5, %xmm5
1056-
; AVX2-NEXT: vpaddb %xmm5, %xmm10, %xmm5
1057-
; AVX2-NEXT: vpcmpeqb %xmm6, %xmm3, %xmm6
1058-
; AVX2-NEXT: vpsrlw $8, %xmm6, %xmm6
1059-
; AVX2-NEXT: vpand %xmm6, %xmm5, %xmm6
1060-
; AVX2-NEXT: vpsrlw $8, %xmm5, %xmm5
1061-
; AVX2-NEXT: vpaddw %xmm6, %xmm5, %xmm5
1062-
; AVX2-NEXT: vinserti128 $1, %xmm5, %ymm8, %ymm5
1063985
; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
1064-
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
1065-
; AVX2-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
986+
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
987+
; AVX2-NEXT: # ymm3 = mem[0,1,0,1]
988+
; AVX2-NEXT: vpshufb %ymm1, %ymm3, %ymm4
989+
; AVX2-NEXT: vpsrlw $4, %ymm1, %ymm5
990+
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm6 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
991+
; AVX2-NEXT: vpand %ymm6, %ymm5, %ymm5
992+
; AVX2-NEXT: vpxor %xmm7, %xmm7, %xmm7
993+
; AVX2-NEXT: vpcmpeqb %ymm7, %ymm5, %ymm8
994+
; AVX2-NEXT: vpand %ymm4, %ymm8, %ymm4
995+
; AVX2-NEXT: vpshufb %ymm5, %ymm3, %ymm5
996+
; AVX2-NEXT: vpaddb %ymm5, %ymm4, %ymm4
997+
; AVX2-NEXT: vpcmpeqb %ymm7, %ymm1, %ymm5
998+
; AVX2-NEXT: vpsrlw $8, %ymm5, %ymm5
999+
; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm5
1000+
; AVX2-NEXT: vpsrlw $8, %ymm4, %ymm4
1001+
; AVX2-NEXT: vpaddw %ymm5, %ymm4, %ymm4
1002+
; AVX2-NEXT: vpcmpeqw %ymm7, %ymm1, %ymm1
10661003
; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1
1067-
; AVX2-NEXT: vpand %ymm1, %ymm5, %ymm1
1068-
; AVX2-NEXT: vpsrld $16, %ymm5, %ymm5
1069-
; AVX2-NEXT: vpaddd %ymm1, %ymm5, %ymm1
1070-
; AVX2-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm4
1004+
; AVX2-NEXT: vpand %ymm1, %ymm4, %ymm1
1005+
; AVX2-NEXT: vpsrld $16, %ymm4, %ymm4
1006+
; AVX2-NEXT: vpaddd %ymm1, %ymm4, %ymm1
10711007
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1072-
; AVX2-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0
1008+
; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm2
1009+
; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm4
1010+
; AVX2-NEXT: vpand %ymm6, %ymm4, %ymm4
1011+
; AVX2-NEXT: vpcmpeqb %ymm7, %ymm4, %ymm5
1012+
; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2
1013+
; AVX2-NEXT: vpshufb %ymm4, %ymm3, %ymm3
1014+
; AVX2-NEXT: vpaddb %ymm3, %ymm2, %ymm2
1015+
; AVX2-NEXT: vpcmpeqb %ymm7, %ymm0, %ymm3
1016+
; AVX2-NEXT: vpsrlw $8, %ymm3, %ymm3
1017+
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm3
1018+
; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2
1019+
; AVX2-NEXT: vpaddw %ymm3, %ymm2, %ymm2
1020+
; AVX2-NEXT: vpcmpeqw %ymm7, %ymm0, %ymm0
10731021
; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
1074-
; AVX2-NEXT: vpand %ymm0, %ymm4, %ymm0
1075-
; AVX2-NEXT: vpsrld $16, %ymm4, %ymm2
1022+
; AVX2-NEXT: vpand %ymm0, %ymm2, %ymm0
1023+
; AVX2-NEXT: vpsrld $16, %ymm2, %ymm2
10761024
; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0
10771025
; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
10781026
; AVX2-NEXT: retq

0 commit comments

Comments
 (0)