Skip to content

Commit 24518e6

Browse files
committed
[DAG] Fold extract_subvector(insert_subvector(x,y,c1),c2) --> extract_subvector(y,c2-c1)
If the extract_subvector is cheap, attempt to extract directly from an inserted subvector
1 parent 298f8f7 commit 24518e6

8 files changed

+994
-1014
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24459,6 +24459,24 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
2445924459
if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
2446024460
return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0));
2446124461

24462+
// extract_subvector(insert_subvector(x,y,c1),c2)
24463+
// --> extract_subvector(y,c2-c1)
24464+
// iff we're just extracting from the inserted subvector.
24465+
if (V.getOpcode() == ISD::INSERT_SUBVECTOR && !NVT.isScalableVector() &&
24466+
!V.getValueType().isScalableVector()) {
24467+
SDValue InsSub = V.getOperand(1);
24468+
EVT InsSubVT = InsSub.getValueType();
24469+
unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
24470+
unsigned InsIdx = V.getConstantOperandVal(2);
24471+
unsigned NumSubElts = NVT.getVectorMinNumElements();
24472+
if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
24473+
TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx)) {
24474+
SDLoc DL(N);
24475+
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
24476+
DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
24477+
}
24478+
}
24479+
2446224480
// Try to move vector bitcast after extract_subv by scaling extraction index:
2446324481
// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
2446424482
if (V.getOpcode() == ISD::BITCAST &&

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,8 @@ define void @vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2(ptr %in.v
314314
;
315315
; AVX512F-LABEL: vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2:
316316
; AVX512F: # %bb.0:
317-
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
318-
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
317+
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
318+
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
319319
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,10,11,0,1,14,15,u,u,u,u,u,u,u,u]
320320
; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
321321
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
@@ -324,8 +324,8 @@ define void @vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2(ptr %in.v
324324
;
325325
; AVX512DQ-LABEL: vec64_i16_widen_to_i32_factor2_broadcast_to_v2i32_factor2:
326326
; AVX512DQ: # %bb.0:
327-
; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
328-
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
327+
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
328+
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
329329
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,10,11,0,1,14,15,u,u,u,u,u,u,u,u]
330330
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
331331
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
@@ -981,7 +981,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
981981
; AVX512F-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,5,0,7]
982982
; AVX512F-NEXT: vmovdqa (%rdi), %ymm1
983983
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
984-
; AVX512F-NEXT: vpermd %zmm1, %zmm0, %zmm0
984+
; AVX512F-NEXT: vpermd %ymm1, %ymm0, %ymm0
985985
; AVX512F-NEXT: vpaddb (%rdx), %ymm0, %ymm0
986986
; AVX512F-NEXT: vmovdqa %ymm0, (%rcx)
987987
; AVX512F-NEXT: vzeroupper
@@ -992,7 +992,7 @@ define void @vec128_i32_widen_to_i64_factor2_broadcast_to_v2i64_factor2(ptr %in.
992992
; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} xmm0 = [0,5,0,7]
993993
; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm1
994994
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
995-
; AVX512DQ-NEXT: vpermd %zmm1, %zmm0, %zmm0
995+
; AVX512DQ-NEXT: vpermd %ymm1, %ymm0, %ymm0
996996
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm0, %ymm0
997997
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rcx)
998998
; AVX512DQ-NEXT: vzeroupper
@@ -3507,13 +3507,12 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
35073507
;
35083508
; AVX512F-LABEL: vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12:
35093509
; AVX512F: # %bb.0:
3510-
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
3511-
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3510+
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
35123511
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
35133512
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3514-
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm2
3515-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7],ymm2[8],ymm1[9],ymm2[10],ymm1[11],ymm2[12],ymm1[13],ymm2[14],ymm1[15]
3513+
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
35163514
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
3515+
; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
35173516
; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
35183517
; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
35193518
; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3523,13 +3522,12 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
35233522
;
35243523
; AVX512DQ-LABEL: vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12:
35253524
; AVX512DQ: # %bb.0:
3526-
; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
3527-
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3525+
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
35283526
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
35293527
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3530-
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm2
3531-
; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7],ymm2[8],ymm1[9],ymm2[10],ymm1[11],ymm2[12],ymm1[13],ymm2[14],ymm1[15]
3528+
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
35323529
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
3530+
; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
35333531
; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
35343532
; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
35353533
; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3768,10 +3766,10 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
37683766
;
37693767
; AVX512F-LABEL: vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6:
37703768
; AVX512F: # %bb.0:
3771-
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
3772-
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3769+
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
37733770
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
37743771
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3772+
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
37753773
; AVX512F-NEXT: vpbroadcastq %xmm0, %ymm2
37763774
; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3],ymm2[4],ymm1[5,6,7],ymm2[8],ymm1[9,10,11],ymm2[12],ymm1[13,14,15]
37773775
; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
@@ -3784,10 +3782,10 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
37843782
;
37853783
; AVX512DQ-LABEL: vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6:
37863784
; AVX512DQ: # %bb.0:
3787-
; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
3788-
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3785+
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
37893786
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
37903787
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
3788+
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
37913789
; AVX512DQ-NEXT: vpbroadcastq %xmm0, %ymm2
37923790
; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3],ymm2[4],ymm1[5,6,7],ymm2[8],ymm1[9,10,11],ymm2[12],ymm1[13,14,15]
37933791
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
@@ -4147,9 +4145,9 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
41474145
;
41484146
; AVX512F-LABEL: vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2:
41494147
; AVX512F: # %bb.0:
4150-
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
4151-
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
4148+
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
41524149
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
4150+
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
41534151
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
41544152
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
41554153
; AVX512F-NEXT: vpbroadcastw %xmm0, %xmm0
@@ -4161,9 +4159,9 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
41614159
;
41624160
; AVX512DQ-LABEL: vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2:
41634161
; AVX512DQ: # %bb.0:
4164-
; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
4165-
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
4162+
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
41664163
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
4164+
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
41674165
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
41684166
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
41694167
; AVX512DQ-NEXT: vpbroadcastw %xmm0, %xmm0

llvm/test/CodeGen/X86/dpbusd.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ define i32 @no_dpbusd(ptr%a, ptr%b, i32 %c, i32 %n) {
2626
; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
2727
; AVX512-NEXT: vpmaddwd %ymm0, %ymm1, %ymm0
2828
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
29-
; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
29+
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
3030
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
3131
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
3232
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]

llvm/test/CodeGen/X86/dpbusd_i4.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ define i32 @mul_sext_i4i4(<16 x i4> %a, <16 x i4> %b, i32 %c) {
8686
; CHECK-NEXT: vpsraw $12, %ymm0, %ymm0
8787
; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
8888
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
89-
; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
89+
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
9090
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
9191
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
9292
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]

llvm/test/CodeGen/X86/insertelement-var-index.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2336,14 +2336,12 @@ define i32 @PR44139(ptr %p) {
23362336
;
23372337
; AVX512-LABEL: PR44139:
23382338
; AVX512: # %bb.0:
2339-
; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
2340-
; AVX512-NEXT: vpbroadcastq (%rdi), %zmm1
2341-
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
2342-
; AVX512-NEXT: vpinsrq $1, (%rdi), %xmm1, %xmm2
2343-
; AVX512-NEXT: vinserti32x4 $0, %xmm2, %zmm1, %zmm2
2344-
; AVX512-NEXT: vmovdqa64 %zmm1, 64(%rdi)
2345-
; AVX512-NEXT: vmovdqa64 %zmm2, (%rdi)
2346-
; AVX512-NEXT: vmovd %xmm0, %eax
2339+
; AVX512-NEXT: vpbroadcastq (%rdi), %zmm0
2340+
; AVX512-NEXT: vpinsrq $1, (%rdi), %xmm0, %xmm1
2341+
; AVX512-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1
2342+
; AVX512-NEXT: vmovdqa64 %zmm0, 64(%rdi)
2343+
; AVX512-NEXT: movl (%rdi), %eax
2344+
; AVX512-NEXT: vmovdqa64 %zmm1, (%rdi)
23472345
; AVX512-NEXT: leal 2147483647(%rax), %ecx
23482346
; AVX512-NEXT: testl %eax, %eax
23492347
; AVX512-NEXT: cmovnsl %eax, %ecx

0 commit comments

Comments
 (0)