Skip to content

[DAGCombiner] Treat extracts from build_vectors that are splats as free #65773

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26196,20 +26196,20 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
EVT EltVT = VT.getVectorElementType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();

// TODO: Remove/replace the extract cost check? If the elements are available
// as scalars, then there may be no extract cost. Should we ask if
// inserting a scalar back into a vector is cheap instead?
int Index0, Index1;
SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
// Extract element from splat_vector should be free.
// TODO: use DAG.isSplatValue instead?
bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
N1.getOpcode() == ISD::SPLAT_VECTOR;
// Extracting from a shuffle_vector might cost something, but extracting from
// a splat_vector or a splatted build_vector should be free since the operands
// are scalars anyway.
bool IsExtractFree = (N0.getOpcode() == ISD::SPLAT_VECTOR ||
N0.getOpcode() == ISD::BUILD_VECTOR) &&
(N1.getOpcode() == ISD::SPLAT_VECTOR ||
N1.getOpcode() == ISD::BUILD_VECTOR);
if (!Src0 || !Src1 || Index0 != Index1 ||
Src0.getValueType().getVectorElementType() != EltVT ||
Src1.getValueType().getVectorElementType() != EltVT ||
!(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
!(IsExtractFree || TLI.isExtractVecEltCheap(VT, Index0)) ||
!TLI.isOperationLegalOrCustom(Opcode, EltVT))
return SDValue();

Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AArch64/active_lane_mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -473,16 +473,16 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
; CHECK-LABEL: lane_mask_v2i1_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d0, #0x0000ff000000ff
; CHECK-NEXT: dup v1.2s, w0
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: movi d2, #0x0000ff000000ff
; CHECK-NEXT: dup v0.2s, w8
; CHECK-NEXT: adrp x8, .LCPI27_0
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI27_0]
; CHECK-NEXT: dup v3.2s, w1
; CHECK-NEXT: and v1.8b, v1.8b, v0.8b
; CHECK-NEXT: add v1.2s, v1.2s, v2.2s
; CHECK-NEXT: umin v1.2s, v1.2s, v0.2s
; CHECK-NEXT: and v0.8b, v3.8b, v0.8b
; CHECK-NEXT: cmhi v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI27_0]
; CHECK-NEXT: and w8, w1, #0xff
; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-NEXT: dup v1.2s, w8
; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s
; CHECK-NEXT: cmhi v0.2s, v1.2s, v0.2s
; CHECK-NEXT: ret
%active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC)
ret <2 x i1> %active.lane.mask
Expand Down
6 changes: 5 additions & 1 deletion llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,12 @@ define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: mvn w9, w8
; CHECK-NEXT: dup v2.2s, w8
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: dup v3.2s, w9
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-NEXT: and v1.8b, v1.8b, v3.8b
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%cmp = icmp ne i1 %cc, 0
%e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
Expand Down
20 changes: 12 additions & 8 deletions llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll
Original file line number Diff line number Diff line change
Expand Up @@ -593,8 +593,9 @@ entry:
define <2 x i32> @fct20(ptr nocapture %sp0) {
; CHECK-LABEL: fct20:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr s0, [x0, #4]
; CHECK-NEXT: mul.2s v0, v0, v0
; CHECK-NEXT: ldr w8, [x0, #4]
; CHECK-NEXT: mul w8, w8, w8
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: ret
entry:
%addr = getelementptr i32, ptr %sp0, i64 1
Expand All @@ -607,8 +608,9 @@ entry:
define <4 x i32> @fct21(ptr nocapture %sp0) {
; CHECK-LABEL: fct21:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr s0, [x0, #4]
; CHECK-NEXT: mul.4s v0, v0, v0
; CHECK-NEXT: ldr w8, [x0, #4]
; CHECK-NEXT: mul w8, w8, w8
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: ret
entry:
%addr = getelementptr i32, ptr %sp0, i64 1
Expand Down Expand Up @@ -703,8 +705,9 @@ entry:
define <2 x i32> @fct28(ptr nocapture %sp0, i64 %offset) {
; CHECK-LABEL: fct28:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
; CHECK-NEXT: mul.2s v0, v0, v0
; CHECK-NEXT: ldr w8, [x0, x1, lsl #2]
; CHECK-NEXT: mul w8, w8, w8
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: ret
entry:
%addr = getelementptr i32, ptr %sp0, i64 %offset
Expand All @@ -717,8 +720,9 @@ entry:
define <4 x i32> @fct29(ptr nocapture %sp0, i64 %offset) {
; CHECK-LABEL: fct29:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
; CHECK-NEXT: mul.4s v0, v0, v0
; CHECK-NEXT: ldr w8, [x0, x1, lsl #2]
; CHECK-NEXT: mul w8, w8, w8
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: ret
entry:
%addr = getelementptr i32, ptr %sp0, i64 %offset
Expand Down
20 changes: 8 additions & 12 deletions llvm/test/CodeGen/AArch64/fdiv-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,11 @@ define void @two_fdiv_double(double %D, double %a, double %b) #0 {
define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
; CHECK-LABEL: splat_three_fdiv_4xfloat:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-NEXT: fmov v4.4s, #1.00000000
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
; CHECK-NEXT: fmov s4, #1.00000000
; CHECK-NEXT: fdiv s4, s4, s0
; CHECK-NEXT: fmul v0.4s, v1.4s, v4.s[0]
; CHECK-NEXT: fmul v1.4s, v2.4s, v4.s[0]
; CHECK-NEXT: fmul v2.4s, v3.4s, v4.s[0]
; CHECK-NEXT: b foo_3_4xf
%D.ins = insertelement <4 x float> poison, float %D, i64 0
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
Expand All @@ -120,11 +118,9 @@ define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b,
define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
; CHECK-LABEL: splat_fdiv_v4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-NEXT: fmov v2.4s, #1.00000000
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: fdiv v0.4s, v2.4s, v0.4s
; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s
; CHECK-NEXT: fmov s2, #1.00000000
; CHECK-NEXT: fdiv s0, s2, s0
; CHECK-NEXT: fmul v0.4s, v1.4s, v0.s[0]
; CHECK-NEXT: ret
entry:
%D.ins = insertelement <4 x float> poison, float %D, i64 0
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -76,28 +76,28 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
; CHECK-NEXT: add x11, x11, x11, lsl #3
; CHECK-NEXT: add x9, x9, x9, lsl #3
; CHECK-NEXT: sub x8, x8, x11
; CHECK-NEXT: sub x11, x13, x12
; CHECK-NEXT: sub x12, x13, x12
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov x8, #8589934591 // =0x1ffffffff
; CHECK-NEXT: sub x9, x10, x9
; CHECK-NEXT: asr x10, x11, #3
; CHECK-NEXT: lsr x10, x12, #3
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: mov v0.d[1], x9
; CHECK-NEXT: add x9, x10, x11, lsr #63
; CHECK-NEXT: add x9, x10, x12, lsr #63
; CHECK-NEXT: add x8, x9, x9, lsl #3
; CHECK-NEXT: adrp x9, .LCPI3_0
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_0]
; CHECK-NEXT: add x8, x12, x8
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: fmov d3, x8
; CHECK-NEXT: add x8, x2, x8
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI3_0]
; CHECK-NEXT: and x8, x8, #0x1ffffffff
; CHECK-NEXT: dup v2.2d, x8
; CHECK-NEXT: adrp x8, .LCPI3_1
; CHECK-NEXT: cmeq v0.2d, v0.2d, v2.2d
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
; CHECK-NEXT: cmeq v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_1]
; CHECK-NEXT: cmeq v1.2d, v2.2d, v1.2d
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: cmeq v1.2d, v1.2d, v2.2d
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: mvn v1.16b, v1.16b
; CHECK-NEXT: xtn v0.2s, v0.2d
; CHECK-NEXT: xtn v1.2s, v1.2d
; CHECK-NEXT: mov w1, v0.s[1]
; CHECK-NEXT: fmov w0, s0
Expand Down
24 changes: 20 additions & 4 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,12 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) v
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: mvn w9, w8
; CHECK-NEXT: dup v2.2s, w8
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: dup v3.2s, w9
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-NEXT: and v1.8b, v1.8b, v3.8b
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%sel = select i1 %mask, <2 x float> %op1, <2 x float> %op2
ret <2 x float> %sel
Expand All @@ -148,8 +152,12 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) v
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: mvn w9, w8
; CHECK-NEXT: dup v2.4s, w8
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: dup v3.4s, w9
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%sel = select i1 %mask, <4 x float> %op1, <4 x float> %op2
ret <4 x float> %sel
Expand Down Expand Up @@ -259,8 +267,12 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm x8, ne
; CHECK-NEXT: mvn x9, x8
; CHECK-NEXT: fmov d2, x8
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: fmov d3, x9
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-NEXT: and v1.8b, v1.8b, v3.8b
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%sel = select i1 %mask, <1 x double> %op1, <1 x double> %op2
ret <1 x double> %sel
Expand All @@ -272,8 +284,12 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm x8, ne
; CHECK-NEXT: mvn x9, x8
; CHECK-NEXT: dup v2.2d, x8
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: dup v3.2d, x9
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%sel = select i1 %mask, <2 x double> %op1, <2 x double> %op2
ret <2 x double> %sel
Expand Down
6 changes: 5 additions & 1 deletion llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,12 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1>
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm x8, ne
; CHECK-NEXT: mvn x9, x8
; CHECK-NEXT: fmov d2, x8
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: fmov d3, x9
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-NEXT: and v1.8b, v1.8b, v3.8b
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%sel = select <1 x i1> %mask, <1 x double> %op1, <1 x double> %op2
ret <1 x double> %sel
Expand Down
24 changes: 20 additions & 4 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,12 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) vscale_
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: mvn w9, w8
; CHECK-NEXT: dup v2.2s, w8
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: dup v3.2s, w9
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-NEXT: and v1.8b, v1.8b, v3.8b
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%sel = select i1 %mask, <2 x i32> %op1, <2 x i32> %op2
ret <2 x i32> %sel
Expand All @@ -267,8 +271,12 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) vscale_
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: mvn w9, w8
; CHECK-NEXT: dup v2.4s, w8
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: dup v3.4s, w9
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%sel = select i1 %mask, <4 x i32> %op1, <4 x i32> %op2
ret <4 x i32> %sel
Expand Down Expand Up @@ -378,8 +386,12 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) vscale_
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm x8, ne
; CHECK-NEXT: mvn x9, x8
; CHECK-NEXT: fmov d2, x8
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: fmov d3, x9
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-NEXT: and v1.8b, v1.8b, v3.8b
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%sel = select i1 %mask, <1 x i64> %op1, <1 x i64> %op2
ret <1 x i64> %sel
Expand All @@ -391,8 +403,12 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) vscale_
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm x8, ne
; CHECK-NEXT: mvn x9, x8
; CHECK-NEXT: dup v2.2d, x8
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: dup v3.2d, x9
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%sel = select i1 %mask, <2 x i64> %op1, <2 x i64> %op2
ret <2 x i64> %sel
Expand Down
6 changes: 5 additions & 1 deletion llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -349,8 +349,12 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) v
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: csetm x8, ne
; CHECK-NEXT: mvn x9, x8
; CHECK-NEXT: fmov d2, x8
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: fmov d3, x9
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
; CHECK-NEXT: and v1.8b, v1.8b, v3.8b
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%sel = select <1 x i1> %mask, <1 x i64> %op1, <1 x i64> %op2
ret <1 x i64> %sel
Expand Down
13 changes: 9 additions & 4 deletions llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ define void @undef_hi_op_v2f16(half %arg0) {
; GFX9-LABEL: undef_hi_op_v2f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
; GFX9-NEXT: v_add_f16_e32 v0, 1.0, v0
; GFX9-NEXT: v_pack_b32_f16 v0, v0, v0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are all regressions

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I've marked this patch as a draft since I just want to share my findings. Hoping others might have some ideas on additional patterns that can be matched to mitigate all these regressions

; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v0
; GFX9-NEXT: ;;#ASMEND
Expand All @@ -254,7 +255,8 @@ define void @undef_hi_op_v2f16(half %arg0) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_f16_e32 v0, 1.0, v0
; GFX8-NEXT: v_or_b32_e32 v0, 0x7e000000, v0
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v0
; GFX8-NEXT: ;;#ASMEND
Expand All @@ -269,8 +271,9 @@ define void @undef_hi_op_v2i16(i16 %arg0) {
; GFX9-LABEL: undef_hi_op_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_movk_i32 s4, 0x63
; GFX9-NEXT: v_pk_add_u16 v0, v0, s4 op_sel_hi:[1,0]
; GFX9-NEXT: v_add_u16_e32 v0, 0x63, v0
; GFX9-NEXT: s_mov_b32 s4, 0x5040100
; GFX9-NEXT: v_perm_b32 v0, v0, v0, s4
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v0
; GFX9-NEXT: ;;#ASMEND
Expand All @@ -280,6 +283,8 @@ define void @undef_hi_op_v2i16(i16 %arg0) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_u16_e32 v0, 0x63, v0
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v0
; GFX8-NEXT: ;;#ASMEND
Expand Down
Loading