Skip to content

Commit 7980f5b

Browse files
committed
[DAGCombiner] Treat extracts from build_vectors that are splats as free
When scalarizing bin ops of splats, we treat the extract as free for a splat_vector because its operand is already scalar, i.e. (extract idx, (splat_vector x)) -> x. The same also applies for a build_vector that's a splat: (extract idx, (build_vector x x x x)) -> x. This patch takes this into account, which enables scalarization for fixed length vectors, since the current canonical form for a splatted fixed length vector is still build_vector. This improves what we were seeing on RISC-V in #65068, but unfortunately causes some patterns to be missed on other targets. One big one is that on AArch64 and X86 scalarizing (xor (splat x), (splat -1)) to (splat (xor x, -1)) prevents vnot from being matched, which for example prevents bif from being matched. Posting this patch as a WIP to show my findings.
1 parent c74b162 commit 7980f5b

34 files changed

+780
-514
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26196,20 +26196,20 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
2619626196
EVT EltVT = VT.getVectorElementType();
2619726197
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2619826198

26199-
// TODO: Remove/replace the extract cost check? If the elements are available
26200-
// as scalars, then there may be no extract cost. Should we ask if
26201-
// inserting a scalar back into a vector is cheap instead?
2620226199
int Index0, Index1;
2620326200
SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
2620426201
SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
26205-
// Extract element from splat_vector should be free.
26206-
// TODO: use DAG.isSplatValue instead?
26207-
bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
26208-
N1.getOpcode() == ISD::SPLAT_VECTOR;
26202+
// Extracting from a shuffle_vector might cost something, but extracting from
26203+
// a splat_vector or a splatted build_vector should be free since the operands
26204+
// are scalars anyway.
26205+
bool IsExtractFree = (N0.getOpcode() == ISD::SPLAT_VECTOR ||
26206+
N0.getOpcode() == ISD::BUILD_VECTOR) &&
26207+
(N1.getOpcode() == ISD::SPLAT_VECTOR ||
26208+
N1.getOpcode() == ISD::BUILD_VECTOR);
2620926209
if (!Src0 || !Src1 || Index0 != Index1 ||
2621026210
Src0.getValueType().getVectorElementType() != EltVT ||
2621126211
Src1.getValueType().getVectorElementType() != EltVT ||
26212-
!(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
26212+
!(IsExtractFree || TLI.isExtractVecEltCheap(VT, Index0)) ||
2621326213
!TLI.isOperationLegalOrCustom(Opcode, EltVT))
2621426214
return SDValue();
2621526215

llvm/test/CodeGen/AArch64/active_lane_mask.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -473,16 +473,16 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
473473
define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
474474
; CHECK-LABEL: lane_mask_v2i1_i8:
475475
; CHECK: // %bb.0:
476-
; CHECK-NEXT: movi d0, #0x0000ff000000ff
477-
; CHECK-NEXT: dup v1.2s, w0
476+
; CHECK-NEXT: and w8, w0, #0xff
477+
; CHECK-NEXT: movi d2, #0x0000ff000000ff
478+
; CHECK-NEXT: dup v0.2s, w8
478479
; CHECK-NEXT: adrp x8, .LCPI27_0
479-
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI27_0]
480-
; CHECK-NEXT: dup v3.2s, w1
481-
; CHECK-NEXT: and v1.8b, v1.8b, v0.8b
482-
; CHECK-NEXT: add v1.2s, v1.2s, v2.2s
483-
; CHECK-NEXT: umin v1.2s, v1.2s, v0.2s
484-
; CHECK-NEXT: and v0.8b, v3.8b, v0.8b
485-
; CHECK-NEXT: cmhi v0.2s, v0.2s, v1.2s
480+
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI27_0]
481+
; CHECK-NEXT: and w8, w1, #0xff
482+
; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
483+
; CHECK-NEXT: dup v1.2s, w8
484+
; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s
485+
; CHECK-NEXT: cmhi v0.2s, v1.2s, v0.2s
486486
; CHECK-NEXT: ret
487487
%active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC)
488488
ret <2 x i1> %active.lane.mask

llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,8 +267,12 @@ define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b
267267
; CHECK: // %bb.0:
268268
; CHECK-NEXT: tst w0, #0x1
269269
; CHECK-NEXT: csetm w8, ne
270+
; CHECK-NEXT: mvn w9, w8
270271
; CHECK-NEXT: dup v2.2s, w8
271-
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
272+
; CHECK-NEXT: dup v3.2s, w9
273+
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
274+
; CHECK-NEXT: and v1.8b, v1.8b, v3.8b
275+
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
272276
; CHECK-NEXT: ret
273277
%cmp = icmp ne i1 %cc, 0
274278
%e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b

llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -593,8 +593,9 @@ entry:
593593
define <2 x i32> @fct20(ptr nocapture %sp0) {
594594
; CHECK-LABEL: fct20:
595595
; CHECK: // %bb.0: // %entry
596-
; CHECK-NEXT: ldr s0, [x0, #4]
597-
; CHECK-NEXT: mul.2s v0, v0, v0
596+
; CHECK-NEXT: ldr w8, [x0, #4]
597+
; CHECK-NEXT: mul w8, w8, w8
598+
; CHECK-NEXT: fmov s0, w8
598599
; CHECK-NEXT: ret
599600
entry:
600601
%addr = getelementptr i32, ptr %sp0, i64 1
@@ -607,8 +608,9 @@ entry:
607608
define <4 x i32> @fct21(ptr nocapture %sp0) {
608609
; CHECK-LABEL: fct21:
609610
; CHECK: // %bb.0: // %entry
610-
; CHECK-NEXT: ldr s0, [x0, #4]
611-
; CHECK-NEXT: mul.4s v0, v0, v0
611+
; CHECK-NEXT: ldr w8, [x0, #4]
612+
; CHECK-NEXT: mul w8, w8, w8
613+
; CHECK-NEXT: fmov s0, w8
612614
; CHECK-NEXT: ret
613615
entry:
614616
%addr = getelementptr i32, ptr %sp0, i64 1
@@ -703,8 +705,9 @@ entry:
703705
define <2 x i32> @fct28(ptr nocapture %sp0, i64 %offset) {
704706
; CHECK-LABEL: fct28:
705707
; CHECK: // %bb.0: // %entry
706-
; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
707-
; CHECK-NEXT: mul.2s v0, v0, v0
708+
; CHECK-NEXT: ldr w8, [x0, x1, lsl #2]
709+
; CHECK-NEXT: mul w8, w8, w8
710+
; CHECK-NEXT: fmov s0, w8
708711
; CHECK-NEXT: ret
709712
entry:
710713
%addr = getelementptr i32, ptr %sp0, i64 %offset
@@ -717,8 +720,9 @@ entry:
717720
define <4 x i32> @fct29(ptr nocapture %sp0, i64 %offset) {
718721
; CHECK-LABEL: fct29:
719722
; CHECK: // %bb.0: // %entry
720-
; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
721-
; CHECK-NEXT: mul.4s v0, v0, v0
723+
; CHECK-NEXT: ldr w8, [x0, x1, lsl #2]
724+
; CHECK-NEXT: mul w8, w8, w8
725+
; CHECK-NEXT: fmov s0, w8
722726
; CHECK-NEXT: ret
723727
entry:
724728
%addr = getelementptr i32, ptr %sp0, i64 %offset

llvm/test/CodeGen/AArch64/fdiv-combine.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,11 @@ define void @two_fdiv_double(double %D, double %a, double %b) #0 {
100100
define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
101101
; CHECK-LABEL: splat_three_fdiv_4xfloat:
102102
; CHECK: // %bb.0:
103-
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
104-
; CHECK-NEXT: fmov v4.4s, #1.00000000
105-
; CHECK-NEXT: dup v0.4s, v0.s[0]
106-
; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
107-
; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
108-
; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
109-
; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
103+
; CHECK-NEXT: fmov s4, #1.00000000
104+
; CHECK-NEXT: fdiv s4, s4, s0
105+
; CHECK-NEXT: fmul v0.4s, v1.4s, v4.s[0]
106+
; CHECK-NEXT: fmul v1.4s, v2.4s, v4.s[0]
107+
; CHECK-NEXT: fmul v2.4s, v3.4s, v4.s[0]
110108
; CHECK-NEXT: b foo_3_4xf
111109
%D.ins = insertelement <4 x float> poison, float %D, i64 0
112110
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
@@ -120,11 +118,9 @@ define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b,
120118
define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
121119
; CHECK-LABEL: splat_fdiv_v4f32:
122120
; CHECK: // %bb.0: // %entry
123-
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
124-
; CHECK-NEXT: fmov v2.4s, #1.00000000
125-
; CHECK-NEXT: dup v0.4s, v0.s[0]
126-
; CHECK-NEXT: fdiv v0.4s, v2.4s, v0.4s
127-
; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s
121+
; CHECK-NEXT: fmov s2, #1.00000000
122+
; CHECK-NEXT: fdiv s0, s2, s0
123+
; CHECK-NEXT: fmul v0.4s, v1.4s, v0.s[0]
128124
; CHECK-NEXT: ret
129125
entry:
130126
%D.ins = insertelement <4 x float> poison, float %D, i64 0

llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -76,28 +76,28 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
7676
; CHECK-NEXT: add x11, x11, x11, lsl #3
7777
; CHECK-NEXT: add x9, x9, x9, lsl #3
7878
; CHECK-NEXT: sub x8, x8, x11
79-
; CHECK-NEXT: sub x11, x13, x12
79+
; CHECK-NEXT: sub x12, x13, x12
8080
; CHECK-NEXT: fmov d0, x8
8181
; CHECK-NEXT: mov x8, #8589934591 // =0x1ffffffff
8282
; CHECK-NEXT: sub x9, x10, x9
83-
; CHECK-NEXT: asr x10, x11, #3
83+
; CHECK-NEXT: lsr x10, x12, #3
8484
; CHECK-NEXT: dup v1.2d, x8
8585
; CHECK-NEXT: mov v0.d[1], x9
86-
; CHECK-NEXT: add x9, x10, x11, lsr #63
86+
; CHECK-NEXT: add x9, x10, x12, lsr #63
8787
; CHECK-NEXT: add x8, x9, x9, lsl #3
8888
; CHECK-NEXT: adrp x9, .LCPI3_0
89-
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_0]
90-
; CHECK-NEXT: add x8, x12, x8
9189
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
92-
; CHECK-NEXT: fmov d3, x8
90+
; CHECK-NEXT: add x8, x2, x8
91+
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI3_0]
92+
; CHECK-NEXT: and x8, x8, #0x1ffffffff
93+
; CHECK-NEXT: dup v2.2d, x8
9394
; CHECK-NEXT: adrp x8, .LCPI3_1
94-
; CHECK-NEXT: cmeq v0.2d, v0.2d, v2.2d
95-
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
96-
; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
95+
; CHECK-NEXT: cmeq v0.2d, v0.2d, v1.2d
96+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_1]
97+
; CHECK-NEXT: cmeq v1.2d, v2.2d, v1.2d
9798
; CHECK-NEXT: mvn v0.16b, v0.16b
98-
; CHECK-NEXT: cmeq v1.2d, v1.2d, v2.2d
99-
; CHECK-NEXT: xtn v0.2s, v0.2d
10099
; CHECK-NEXT: mvn v1.16b, v1.16b
100+
; CHECK-NEXT: xtn v0.2s, v0.2d
101101
; CHECK-NEXT: xtn v1.2s, v1.2d
102102
; CHECK-NEXT: mov w1, v0.s[1]
103103
; CHECK-NEXT: fmov w0, s0

llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,12 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) v
135135
; CHECK: // %bb.0:
136136
; CHECK-NEXT: tst w0, #0x1
137137
; CHECK-NEXT: csetm w8, ne
138+
; CHECK-NEXT: mvn w9, w8
138139
; CHECK-NEXT: dup v2.2s, w8
139-
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
140+
; CHECK-NEXT: dup v3.2s, w9
141+
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
142+
; CHECK-NEXT: and v1.8b, v1.8b, v3.8b
143+
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
140144
; CHECK-NEXT: ret
141145
%sel = select i1 %mask, <2 x float> %op1, <2 x float> %op2
142146
ret <2 x float> %sel
@@ -148,8 +152,12 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) v
148152
; CHECK: // %bb.0:
149153
; CHECK-NEXT: tst w0, #0x1
150154
; CHECK-NEXT: csetm w8, ne
155+
; CHECK-NEXT: mvn w9, w8
151156
; CHECK-NEXT: dup v2.4s, w8
152-
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
157+
; CHECK-NEXT: dup v3.4s, w9
158+
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
159+
; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
160+
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
153161
; CHECK-NEXT: ret
154162
%sel = select i1 %mask, <4 x float> %op1, <4 x float> %op2
155163
ret <4 x float> %sel
@@ -259,8 +267,12 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask
259267
; CHECK: // %bb.0:
260268
; CHECK-NEXT: tst w0, #0x1
261269
; CHECK-NEXT: csetm x8, ne
270+
; CHECK-NEXT: mvn x9, x8
262271
; CHECK-NEXT: fmov d2, x8
263-
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
272+
; CHECK-NEXT: fmov d3, x9
273+
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
274+
; CHECK-NEXT: and v1.8b, v1.8b, v3.8b
275+
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
264276
; CHECK-NEXT: ret
265277
%sel = select i1 %mask, <1 x double> %op1, <1 x double> %op2
266278
ret <1 x double> %sel
@@ -272,8 +284,12 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask
272284
; CHECK: // %bb.0:
273285
; CHECK-NEXT: tst w0, #0x1
274286
; CHECK-NEXT: csetm x8, ne
287+
; CHECK-NEXT: mvn x9, x8
275288
; CHECK-NEXT: dup v2.2d, x8
276-
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
289+
; CHECK-NEXT: dup v3.2d, x9
290+
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
291+
; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
292+
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
277293
; CHECK-NEXT: ret
278294
%sel = select i1 %mask, <2 x double> %op1, <2 x double> %op2
279295
ret <2 x double> %sel

llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,12 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1>
237237
; CHECK: // %bb.0:
238238
; CHECK-NEXT: tst w0, #0x1
239239
; CHECK-NEXT: csetm x8, ne
240+
; CHECK-NEXT: mvn x9, x8
240241
; CHECK-NEXT: fmov d2, x8
241-
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
242+
; CHECK-NEXT: fmov d3, x9
243+
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
244+
; CHECK-NEXT: and v1.8b, v1.8b, v3.8b
245+
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
242246
; CHECK-NEXT: ret
243247
%sel = select <1 x i1> %mask, <1 x double> %op1, <1 x double> %op2
244248
ret <1 x double> %sel

llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,12 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) vscale_
254254
; CHECK: // %bb.0:
255255
; CHECK-NEXT: tst w0, #0x1
256256
; CHECK-NEXT: csetm w8, ne
257+
; CHECK-NEXT: mvn w9, w8
257258
; CHECK-NEXT: dup v2.2s, w8
258-
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
259+
; CHECK-NEXT: dup v3.2s, w9
260+
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
261+
; CHECK-NEXT: and v1.8b, v1.8b, v3.8b
262+
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
259263
; CHECK-NEXT: ret
260264
%sel = select i1 %mask, <2 x i32> %op1, <2 x i32> %op2
261265
ret <2 x i32> %sel
@@ -267,8 +271,12 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) vscale_
267271
; CHECK: // %bb.0:
268272
; CHECK-NEXT: tst w0, #0x1
269273
; CHECK-NEXT: csetm w8, ne
274+
; CHECK-NEXT: mvn w9, w8
270275
; CHECK-NEXT: dup v2.4s, w8
271-
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
276+
; CHECK-NEXT: dup v3.4s, w9
277+
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
278+
; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
279+
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
272280
; CHECK-NEXT: ret
273281
%sel = select i1 %mask, <4 x i32> %op1, <4 x i32> %op2
274282
ret <4 x i32> %sel
@@ -378,8 +386,12 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) vscale_
378386
; CHECK: // %bb.0:
379387
; CHECK-NEXT: tst w0, #0x1
380388
; CHECK-NEXT: csetm x8, ne
389+
; CHECK-NEXT: mvn x9, x8
381390
; CHECK-NEXT: fmov d2, x8
382-
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
391+
; CHECK-NEXT: fmov d3, x9
392+
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
393+
; CHECK-NEXT: and v1.8b, v1.8b, v3.8b
394+
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
383395
; CHECK-NEXT: ret
384396
%sel = select i1 %mask, <1 x i64> %op1, <1 x i64> %op2
385397
ret <1 x i64> %sel
@@ -391,8 +403,12 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) vscale_
391403
; CHECK: // %bb.0:
392404
; CHECK-NEXT: tst w0, #0x1
393405
; CHECK-NEXT: csetm x8, ne
406+
; CHECK-NEXT: mvn x9, x8
394407
; CHECK-NEXT: dup v2.2d, x8
395-
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
408+
; CHECK-NEXT: dup v3.2d, x9
409+
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
410+
; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
411+
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
396412
; CHECK-NEXT: ret
397413
%sel = select i1 %mask, <2 x i64> %op1, <2 x i64> %op2
398414
ret <2 x i64> %sel

llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,8 +349,12 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) v
349349
; CHECK: // %bb.0:
350350
; CHECK-NEXT: tst w0, #0x1
351351
; CHECK-NEXT: csetm x8, ne
352+
; CHECK-NEXT: mvn x9, x8
352353
; CHECK-NEXT: fmov d2, x8
353-
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
354+
; CHECK-NEXT: fmov d3, x9
355+
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
356+
; CHECK-NEXT: and v1.8b, v1.8b, v3.8b
357+
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
354358
; CHECK-NEXT: ret
355359
%sel = select <1 x i1> %mask, <1 x i64> %op1, <1 x i64> %op2
356360
ret <1 x i64> %sel

llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,8 @@ define void @undef_hi_op_v2f16(half %arg0) {
244244
; GFX9-LABEL: undef_hi_op_v2f16:
245245
; GFX9: ; %bb.0:
246246
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
247-
; GFX9-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
247+
; GFX9-NEXT: v_add_f16_e32 v0, 1.0, v0
248+
; GFX9-NEXT: v_pack_b32_f16 v0, v0, v0
248249
; GFX9-NEXT: ;;#ASMSTART
249250
; GFX9-NEXT: ; use v0
250251
; GFX9-NEXT: ;;#ASMEND
@@ -254,7 +255,8 @@ define void @undef_hi_op_v2f16(half %arg0) {
254255
; GFX8: ; %bb.0:
255256
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256257
; GFX8-NEXT: v_add_f16_e32 v0, 1.0, v0
257-
; GFX8-NEXT: v_or_b32_e32 v0, 0x7e000000, v0
258+
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
259+
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
258260
; GFX8-NEXT: ;;#ASMSTART
259261
; GFX8-NEXT: ; use v0
260262
; GFX8-NEXT: ;;#ASMEND
@@ -269,8 +271,9 @@ define void @undef_hi_op_v2i16(i16 %arg0) {
269271
; GFX9-LABEL: undef_hi_op_v2i16:
270272
; GFX9: ; %bb.0:
271273
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272-
; GFX9-NEXT: s_movk_i32 s4, 0x63
273-
; GFX9-NEXT: v_pk_add_u16 v0, v0, s4 op_sel_hi:[1,0]
274+
; GFX9-NEXT: v_add_u16_e32 v0, 0x63, v0
275+
; GFX9-NEXT: s_mov_b32 s4, 0x5040100
276+
; GFX9-NEXT: v_perm_b32 v0, v0, v0, s4
274277
; GFX9-NEXT: ;;#ASMSTART
275278
; GFX9-NEXT: ; use v0
276279
; GFX9-NEXT: ;;#ASMEND
@@ -280,6 +283,8 @@ define void @undef_hi_op_v2i16(i16 %arg0) {
280283
; GFX8: ; %bb.0:
281284
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282285
; GFX8-NEXT: v_add_u16_e32 v0, 0x63, v0
286+
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
287+
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
283288
; GFX8-NEXT: ;;#ASMSTART
284289
; GFX8-NEXT: ; use v0
285290
; GFX8-NEXT: ;;#ASMEND

0 commit comments

Comments
 (0)