Skip to content

Commit 5c7bbe3

Browse files
committed
[X86] canonicalizeShuffleWithOp - recognise constant vectors with getTargetConstantFromNode
Allows shuffle to fold constant vectors that have already been lowered to constant pool - shuffle combining can then constant fold this. Noticed while triaging #79100
1 parent a369619 commit 5c7bbe3

13 files changed

+2267
-2320
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39638,6 +39638,7 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
3963839638
ISD::isBuildVectorAllZeros(Op.getNode()) ||
3963939639
ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3964039640
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()) ||
39641+
getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op)) ||
3964139642
(Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op->hasOneUse()) ||
3964239643
(isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()) ||
3964339644
(FoldLoad && isShuffleFoldableLoad(Op)) ||

llvm/test/CodeGen/X86/i64-to-float.ll

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -257,35 +257,34 @@ define <4 x float> @mask_uitofp_4i64_4f32(<4 x i64> %a) nounwind {
257257
define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind {
258258
; X86-SSE-LABEL: clamp_sitofp_2i64_2f64:
259259
; X86-SSE: # %bb.0:
260-
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,0,2147483648,0]
261-
; X86-SSE-NEXT: movdqa %xmm0, %xmm2
262-
; X86-SSE-NEXT: pxor %xmm1, %xmm2
263-
; X86-SSE-NEXT: movdqa {{.*#+}} xmm3 = [2147483393,4294967295,2147483393,4294967295]
264-
; X86-SSE-NEXT: movdqa %xmm2, %xmm4
265-
; X86-SSE-NEXT: pcmpgtd %xmm3, %xmm4
266-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
267-
; X86-SSE-NEXT: pcmpeqd %xmm3, %xmm2
268-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
269-
; X86-SSE-NEXT: pand %xmm5, %xmm3
270-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
271-
; X86-SSE-NEXT: por %xmm3, %xmm2
272-
; X86-SSE-NEXT: pand %xmm2, %xmm0
273-
; X86-SSE-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
274-
; X86-SSE-NEXT: por %xmm0, %xmm2
260+
; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
261+
; X86-SSE-NEXT: movdqa %xmm0, %xmm1
275262
; X86-SSE-NEXT: pxor %xmm2, %xmm1
276-
; X86-SSE-NEXT: movdqa {{.*#+}} xmm0 = [2147483903,0,2147483903,0]
277-
; X86-SSE-NEXT: movdqa %xmm0, %xmm3
278-
; X86-SSE-NEXT: pcmpgtd %xmm1, %xmm3
279-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
280-
; X86-SSE-NEXT: pcmpeqd %xmm0, %xmm1
281-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
282-
; X86-SSE-NEXT: pand %xmm4, %xmm0
283-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
284-
; X86-SSE-NEXT: por %xmm0, %xmm1
285-
; X86-SSE-NEXT: pand %xmm1, %xmm2
263+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
264+
; X86-SSE-NEXT: pcmpeqd %xmm4, %xmm4
265+
; X86-SSE-NEXT: pcmpeqd %xmm3, %xmm4
266+
; X86-SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
267+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
268+
; X86-SSE-NEXT: pand %xmm4, %xmm3
269+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
270+
; X86-SSE-NEXT: por %xmm3, %xmm1
271+
; X86-SSE-NEXT: pand %xmm1, %xmm0
286272
; X86-SSE-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
287-
; X86-SSE-NEXT: por %xmm2, %xmm1
288-
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
273+
; X86-SSE-NEXT: por %xmm0, %xmm1
274+
; X86-SSE-NEXT: pxor %xmm1, %xmm2
275+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
276+
; X86-SSE-NEXT: pxor %xmm3, %xmm3
277+
; X86-SSE-NEXT: pcmpeqd %xmm0, %xmm3
278+
; X86-SSE-NEXT: movdqa {{.*#+}} xmm0 = [2147483903,0,2147483903,0]
279+
; X86-SSE-NEXT: pcmpgtd %xmm2, %xmm0
280+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2]
281+
; X86-SSE-NEXT: pand %xmm3, %xmm2
282+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
283+
; X86-SSE-NEXT: por %xmm2, %xmm0
284+
; X86-SSE-NEXT: pand %xmm0, %xmm1
285+
; X86-SSE-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
286+
; X86-SSE-NEXT: por %xmm1, %xmm0
287+
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
289288
; X86-SSE-NEXT: cvtdq2pd %xmm0, %xmm0
290289
; X86-SSE-NEXT: retl
291290
;

llvm/test/CodeGen/X86/packus.ll

Lines changed: 22 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -118,45 +118,25 @@ define <8 x i16> @trunc_lshr_v8i32(<8 x i32> %a) nounwind {
118118
}
119119

120120
define <8 x i16> @trunc_lshr_v4i64_demandedelts(<4 x i64> %a0) {
121-
; X86-SSE2-LABEL: trunc_lshr_v4i64_demandedelts:
122-
; X86-SSE2: # %bb.0:
123-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
124-
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
125-
; X86-SSE2-NEXT: pand %xmm2, %xmm1
126-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
127-
; X86-SSE2-NEXT: pand %xmm2, %xmm0
128-
; X86-SSE2-NEXT: packuswb %xmm1, %xmm0
129-
; X86-SSE2-NEXT: retl
130-
;
131-
; X64-SSE2-LABEL: trunc_lshr_v4i64_demandedelts:
132-
; X64-SSE2: # %bb.0:
133-
; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,18446744073709551615]
134-
; X64-SSE2-NEXT: pand %xmm2, %xmm0
135-
; X64-SSE2-NEXT: pand %xmm2, %xmm1
136-
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
137-
; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
138-
; X64-SSE2-NEXT: packuswb %xmm1, %xmm0
139-
; X64-SSE2-NEXT: retq
140-
;
141-
; X86-SSE4-LABEL: trunc_lshr_v4i64_demandedelts:
142-
; X86-SSE4: # %bb.0:
143-
; X86-SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
144-
; X86-SSE4-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
145-
; X86-SSE4-NEXT: pand %xmm2, %xmm1
146-
; X86-SSE4-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
147-
; X86-SSE4-NEXT: pand %xmm2, %xmm0
148-
; X86-SSE4-NEXT: packusdw %xmm1, %xmm0
149-
; X86-SSE4-NEXT: retl
121+
; SSE2-LABEL: trunc_lshr_v4i64_demandedelts:
122+
; SSE2: # %bb.0:
123+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
124+
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
125+
; SSE2-NEXT: pand %xmm2, %xmm1
126+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
127+
; SSE2-NEXT: pand %xmm2, %xmm0
128+
; SSE2-NEXT: packuswb %xmm1, %xmm0
129+
; SSE2-NEXT: ret{{[l|q]}}
150130
;
151-
; X64-SSE4-LABEL: trunc_lshr_v4i64_demandedelts:
152-
; X64-SSE4: # %bb.0:
153-
; X64-SSE4-NEXT: movdqa {{.*#+}} xmm2 = [1,18446744073709551615]
154-
; X64-SSE4-NEXT: pand %xmm2, %xmm0
155-
; X64-SSE4-NEXT: pand %xmm2, %xmm1
156-
; X64-SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
157-
; X64-SSE4-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
158-
; X64-SSE4-NEXT: packusdw %xmm1, %xmm0
159-
; X64-SSE4-NEXT: retq
131+
; SSE4-LABEL: trunc_lshr_v4i64_demandedelts:
132+
; SSE4: # %bb.0:
133+
; SSE4-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
134+
; SSE4-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
135+
; SSE4-NEXT: pand %xmm2, %xmm1
136+
; SSE4-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
137+
; SSE4-NEXT: pand %xmm2, %xmm0
138+
; SSE4-NEXT: packusdw %xmm1, %xmm0
139+
; SSE4-NEXT: ret{{[l|q]}}
160140
;
161141
; X86-AVX1-LABEL: trunc_lshr_v4i64_demandedelts:
162142
; X86-AVX1: # %bb.0:
@@ -467,4 +447,8 @@ define <32 x i8> @packuswb_icmp_zero_trunc_256(<16 x i16> %a0) {
467447
}
468448
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
469449
; X64-AVX2: {{.*}}
450+
; X64-SSE2: {{.*}}
451+
; X64-SSE4: {{.*}}
470452
; X86-AVX2: {{.*}}
453+
; X86-SSE2: {{.*}}
454+
; X86-SSE4: {{.*}}

llvm/test/CodeGen/X86/sext-vsetcc.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,10 @@ define <4 x i32> @cmp_slt_load_const(ptr %x) nounwind {
283283
; SSE-LABEL: cmp_slt_load_const:
284284
; SSE: # %bb.0:
285285
; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
286-
; SSE-NEXT: movdqa {{.*#+}} xmm0 = <42,214,0,255,u,u,u,u,u,u,u,u,u,u,u,u>
286+
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
287+
; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
288+
; SSE-NEXT: movdqa {{.*#+}} xmm0 = [10794,10794,54998,54998,0,0,65535,65535]
287289
; SSE-NEXT: pcmpgtb %xmm1, %xmm0
288-
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
289-
; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
290290
; SSE-NEXT: retq
291291
;
292292
; AVX-LABEL: cmp_slt_load_const:

llvm/test/CodeGen/X86/test-shrink-bug.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ define dso_local void @fail(i16 %a, <2 x i8> %b) {
6767
; CHECK-X64-NEXT: testl $263, %edi # imm = 0x107
6868
; CHECK-X64-NEXT: je .LBB1_3
6969
; CHECK-X64-NEXT: # %bb.1:
70+
; CHECK-X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
7071
; CHECK-X64-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
71-
; CHECK-X64-NEXT: pslld $8, %xmm0
7272
; CHECK-X64-NEXT: pextrw $1, %xmm0, %eax
7373
; CHECK-X64-NEXT: testb $1, %al
7474
; CHECK-X64-NEXT: jne .LBB1_3

0 commit comments

Comments
 (0)