Skip to content

Commit f734a0c

Browse files
author
Abhinav Garg
committed
Address review comments: Scalarize v2s16 for uniform operation
1 parent 704ffce commit f734a0c

File tree

5 files changed

+99
-131
lines changed

5 files changed

+99
-131
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,24 @@ void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) {
616616
MI.eraseFromParent();
617617
}
618618

619+
void RegBankLegalizeHelper::lowerSplitTo16(MachineInstr &MI) {
620+
Register Dst = MI.getOperand(0).getReg();
621+
LLT DstTy = MRI.getType(Dst);
622+
assert(DstTy == V2S16);
623+
auto [Val0_Lo_32, Val0_Hi_32] = unpackAExt(MI.getOperand(1).getReg());
624+
auto [Val1_Lo_32, Val1_Hi_32] = unpackAExt(MI.getOperand(2).getReg());
625+
unsigned Opc = MI.getOpcode();
626+
auto Flags = MI.getFlags();
627+
auto Val0_Lo = B.buildTrunc(SgprRB_S16, Val0_Lo_32);
628+
auto Val0_Hi = B.buildTrunc(SgprRB_S16, Val0_Hi_32);
629+
auto Val1_Lo = B.buildTrunc(SgprRB_S16, Val1_Lo_32);
630+
auto Val1_Hi = B.buildTrunc(SgprRB_S16, Val1_Hi_32);
631+
auto Lo = B.buildInstr(Opc, {SgprRB_S16}, {Val0_Lo, Val1_Lo}, Flags);
632+
auto Hi = B.buildInstr(Opc, {SgprRB_S16}, {Val0_Hi, Val1_Hi}, Flags);
633+
B.buildMergeLikeInstr(Dst, {Lo.getReg(0), Hi.getReg(0)});
634+
MI.eraseFromParent();
635+
}
636+
619637
void RegBankLegalizeHelper::lowerSplitTo32Select(MachineInstr &MI) {
620638
Register Dst = MI.getOperand(0).getReg();
621639
LLT DstTy = MRI.getType(Dst);
@@ -688,6 +706,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
688706
return lowerUnpackBitShift(MI);
689707
case UnpackMinMax:
690708
return lowerUnpackMinMax(MI);
709+
case ScalarizeToS16:
710+
return lowerSplitTo16(MI);
691711
case Ext32To64: {
692712
const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
693713
MachineInstrBuilder Hi;

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class RegBankLegalizeHelper {
7272
static constexpr LLT P6 = LLT::pointer(6, 32);
7373

7474
MachineRegisterInfo::VRegAttrs SgprRB_S32 = {SgprRB, S32};
75+
MachineRegisterInfo::VRegAttrs SgprRB_S16 = {SgprRB, S16};
7576
MachineRegisterInfo::VRegAttrs VgprRB_S32 = {VgprRB, S32};
7677
MachineRegisterInfo::VRegAttrs VccRB_S1 = {VccRB, S1};
7778

@@ -121,6 +122,7 @@ class RegBankLegalizeHelper {
121122
void lowerV_BFE(MachineInstr &MI);
122123
void lowerS_BFE(MachineInstr &MI);
123124
void lowerSplitTo32(MachineInstr &MI);
125+
void lowerSplitTo16(MachineInstr &MI);
124126
void lowerSplitTo32Select(MachineInstr &MI);
125127
void lowerSplitTo32SExtInReg(MachineInstr &MI);
126128
void lowerUnpackMinMax(MachineInstr &MI);

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -914,7 +914,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
914914
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
915915
.Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
916916
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
917-
.Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
917+
.Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
918+
.Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, ScalarizeToS16},
919+
hasSALUFloat)
918920
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
919921
.Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32}}})
920922
.Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32, VgprV2S32}}});

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ enum LoweringMethodID {
221221
V_BFE,
222222
VgprToVccCopy,
223223
SplitTo32,
224+
ScalarizeToS16,
224225
SplitTo32Select,
225226
SplitTo32SExtInReg,
226227
Ext32To64,
Lines changed: 73 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -1,90 +1,67 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2-
; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
3-
; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
4-
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
5-
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
6-
; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s
7-
; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s
8-
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s
9-
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s
2+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s
3+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s
4+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s
5+
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s
106

117
define amdgpu_ps half @fadd_s16_uniform(half inreg %a, half inreg %b) {
12-
; GFX11-SDAG-FAKE16-LABEL: fadd_s16_uniform:
13-
; GFX11-SDAG-FAKE16: ; %bb.0:
14-
; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e64 v0, s0, s1
15-
; GFX11-SDAG-FAKE16-NEXT: ; return to shader part epilog
16-
;
17-
; GFX11-SDAG-TRUE16-LABEL: fadd_s16_uniform:
18-
; GFX11-SDAG-TRUE16: ; %bb.0:
19-
; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1
20-
; GFX11-SDAG-TRUE16-NEXT: ; return to shader part epilog
21-
;
22-
; GFX11-GISEL-FAKE16-LABEL: fadd_s16_uniform:
23-
; GFX11-GISEL-FAKE16: ; %bb.0:
24-
; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e64 v0, s0, s1
25-
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
26-
; GFX11-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
27-
; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s0
28-
; GFX11-GISEL-FAKE16-NEXT: ; return to shader part epilog
29-
;
30-
; GFX11-GISEL-TRUE16-LABEL: fadd_s16_uniform:
31-
; GFX11-GISEL-TRUE16: ; %bb.0:
32-
; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1
33-
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
34-
; GFX11-GISEL-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
35-
; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s0
36-
; GFX11-GISEL-TRUE16-NEXT: ; return to shader part epilog
8+
; GFX11-FAKE16-LABEL: fadd_s16_uniform:
9+
; GFX11-FAKE16: ; %bb.0:
10+
; GFX11-FAKE16-NEXT: v_add_f16_e64 v0, s0, s1
11+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
12+
; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0
13+
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s0
14+
; GFX11-FAKE16-NEXT: ; return to shader part epilog
15+
;
16+
; GFX11-TRUE16-LABEL: fadd_s16_uniform:
17+
; GFX11-TRUE16: ; %bb.0:
18+
; GFX11-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1
19+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
20+
; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0
21+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, s0
22+
; GFX11-TRUE16-NEXT: ; return to shader part epilog
3723
;
3824
; GFX12-LABEL: fadd_s16_uniform:
3925
; GFX12: ; %bb.0:
4026
; GFX12-NEXT: s_add_f16 s0, s0, s1
4127
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
4228
; GFX12-NEXT: v_mov_b32_e32 v0, s0
4329
; GFX12-NEXT: ; return to shader part epilog
30+
; -LABEL: fadd_s16_uniform:
31+
; : ; %bb.0:
32+
; -NEXT: v_add_f16_e64 v0.l, s0, s1
33+
; -NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
34+
; -NEXT: v_readfirstlane_b32 s0, v0
35+
; -NEXT: v_mov_b32_e32 v0, s0
36+
; -NEXT: ; return to shader part epilog
4437
%fadd = fadd half %a, %b
4538
ret half %fadd
4639
}
4740

4841
define amdgpu_ps half @fadd_s16_div(half %a, half %b) {
49-
; GFX11-SDAG-FAKE16-LABEL: fadd_s16_div:
50-
; GFX11-SDAG-FAKE16: ; %bb.0:
51-
; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
52-
; GFX11-SDAG-FAKE16-NEXT: ; return to shader part epilog
53-
;
54-
; GFX11-SDAG-TRUE16-LABEL: fadd_s16_div:
55-
; GFX11-SDAG-TRUE16: ; %bb.0:
56-
; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
57-
; GFX11-SDAG-TRUE16-NEXT: ; return to shader part epilog
58-
;
59-
; GFX11-GISEL-FAKE16-LABEL: fadd_s16_div:
60-
; GFX11-GISEL-FAKE16: ; %bb.0:
61-
; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
62-
; GFX11-GISEL-FAKE16-NEXT: ; return to shader part epilog
63-
;
64-
; GFX11-GISEL-TRUE16-LABEL: fadd_s16_div:
65-
; GFX11-GISEL-TRUE16: ; %bb.0:
66-
; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
67-
; GFX11-GISEL-TRUE16-NEXT: ; return to shader part epilog
68-
;
69-
; GFX12-SDAG-FAKE16-LABEL: fadd_s16_div:
70-
; GFX12-SDAG-FAKE16: ; %bb.0:
71-
; GFX12-SDAG-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
72-
; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog
73-
;
74-
; GFX12-SDAG-TRUE16-LABEL: fadd_s16_div:
75-
; GFX12-SDAG-TRUE16: ; %bb.0:
76-
; GFX12-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
77-
; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog
78-
;
79-
; GFX12-GISEL-FAKE16-LABEL: fadd_s16_div:
80-
; GFX12-GISEL-FAKE16: ; %bb.0:
81-
; GFX12-GISEL-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
82-
; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog
83-
;
84-
; GFX12-GISEL-TRUE16-LABEL: fadd_s16_div:
85-
; GFX12-GISEL-TRUE16: ; %bb.0:
86-
; GFX12-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
87-
; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog
42+
; GFX11-FAKE16-LABEL: fadd_s16_div:
43+
; GFX11-FAKE16: ; %bb.0:
44+
; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
45+
; GFX11-FAKE16-NEXT: ; return to shader part epilog
46+
;
47+
; GFX11-TRUE16-LABEL: fadd_s16_div:
48+
; GFX11-TRUE16: ; %bb.0:
49+
; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
50+
; GFX11-TRUE16-NEXT: ; return to shader part epilog
51+
;
52+
; GFX12-FAKE16-LABEL: fadd_s16_div:
53+
; GFX12-FAKE16: ; %bb.0:
54+
; GFX12-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1
55+
; GFX12-FAKE16-NEXT: ; return to shader part epilog
56+
;
57+
; GFX12-TRUE16-LABEL: fadd_s16_div:
58+
; GFX12-TRUE16: ; %bb.0:
59+
; GFX12-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
60+
; GFX12-TRUE16-NEXT: ; return to shader part epilog
61+
; -LABEL: fadd_s16_div:
62+
; : ; %bb.0:
63+
; -NEXT: v_add_f16_e32 v0.l, v0.l, v1.l
64+
; -NEXT: ; return to shader part epilog
8865
%fadd = fadd half %a, %b
8966
ret half %fadd
9067
}
@@ -155,92 +132,58 @@ define amdgpu_ps double @fadd_s64_div(double %a, double %b) {
155132
ret double %fadd
156133
}
157134

158-
define <2 x half> @fadd_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) {
135+
define amdgpu_ps <2 x half> @fadd_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) {
159136
; GFX11-LABEL: fadd_v2s16_uniform:
160137
; GFX11: ; %bb.0:
161-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162138
; GFX11-NEXT: v_pk_add_f16 v0, s0, s1
163-
; GFX11-NEXT: s_setpc_b64 s[30:31]
139+
; GFX11-NEXT: ; return to shader part epilog
164140
;
165141
; GFX12-LABEL: fadd_v2s16_uniform:
166142
; GFX12: ; %bb.0:
167-
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
168-
; GFX12-NEXT: s_wait_expcnt 0x0
169-
; GFX12-NEXT: s_wait_samplecnt 0x0
170-
; GFX12-NEXT: s_wait_bvhcnt 0x0
171-
; GFX12-NEXT: s_wait_kmcnt 0x0
172-
; GFX12-NEXT: v_pk_add_f16 v0, s0, s1
173-
; GFX12-NEXT: s_setpc_b64 s[30:31]
143+
; GFX12-NEXT: s_lshr_b32 s2, s0, 16
144+
; GFX12-NEXT: s_lshr_b32 s3, s1, 16
145+
; GFX12-NEXT: s_add_f16 s0, s0, s1
146+
; GFX12-NEXT: s_add_f16 s1, s2, s3
147+
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
148+
; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s1
149+
; GFX12-NEXT: v_mov_b32_e32 v0, s0
150+
; GFX12-NEXT: ; return to shader part epilog
174151
%fadd = fadd <2 x half> %a, %b
175152
ret <2 x half> %fadd
176153
}
177154

178-
define <2 x half> @fadd_v2s16_div(<2 x half> %a, <2 x half> %b) {
179-
; GFX11-LABEL: fadd_v2s16_div:
180-
; GFX11: ; %bb.0:
181-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182-
; GFX11-NEXT: v_pk_add_f16 v0, v0, v1
183-
; GFX11-NEXT: s_setpc_b64 s[30:31]
184-
;
185-
; GFX12-LABEL: fadd_v2s16_div:
186-
; GFX12: ; %bb.0:
187-
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
188-
; GFX12-NEXT: s_wait_expcnt 0x0
189-
; GFX12-NEXT: s_wait_samplecnt 0x0
190-
; GFX12-NEXT: s_wait_bvhcnt 0x0
191-
; GFX12-NEXT: s_wait_kmcnt 0x0
192-
; GFX12-NEXT: v_pk_add_f16 v0, v0, v1
193-
; GFX12-NEXT: s_setpc_b64 s[30:31]
155+
define amdgpu_ps <2 x half> @fadd_v2s16_div(<2 x half> %a, <2 x half> %b) {
156+
; GCN-LABEL: fadd_v2s16_div:
157+
; GCN: ; %bb.0:
158+
; GCN-NEXT: v_pk_add_f16 v0, v0, v1
159+
; GCN-NEXT: ; return to shader part epilog
194160
%fadd = fadd <2 x half> %a, %b
195161
ret <2 x half> %fadd
196162
}
197163

198-
define <2 x float> @fadd_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) {
164+
define amdgpu_ps <2 x float> @fadd_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) {
199165
; GFX11-LABEL: fadd_v2s32_uniform:
200166
; GFX11: ; %bb.0:
201-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
202167
; GFX11-NEXT: v_add_f32_e64 v0, s0, s2
203168
; GFX11-NEXT: v_add_f32_e64 v1, s1, s3
204-
; GFX11-NEXT: s_setpc_b64 s[30:31]
169+
; GFX11-NEXT: ; return to shader part epilog
205170
;
206171
; GFX12-LABEL: fadd_v2s32_uniform:
207172
; GFX12: ; %bb.0:
208-
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
209-
; GFX12-NEXT: s_wait_expcnt 0x0
210-
; GFX12-NEXT: s_wait_samplecnt 0x0
211-
; GFX12-NEXT: s_wait_bvhcnt 0x0
212-
; GFX12-NEXT: s_wait_kmcnt 0x0
213173
; GFX12-NEXT: s_add_f32 s0, s0, s2
214174
; GFX12-NEXT: s_add_f32 s1, s1, s3
215-
; GFX12-NEXT: s_wait_alu 0xfffe
216-
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
175+
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
217176
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
218-
; GFX12-NEXT: s_setpc_b64 s[30:31]
177+
; GFX12-NEXT: ; return to shader part epilog
219178
%fadd = fadd <2 x float> %a, %b
220179
ret <2 x float> %fadd
221180
}
222181

223-
define <2 x float> @fadd_v2s32_div(<2 x float> %a, <2 x float> %b) {
224-
; GFX11-LABEL: fadd_v2s32_div:
225-
; GFX11: ; %bb.0:
226-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
227-
; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
228-
; GFX11-NEXT: s_setpc_b64 s[30:31]
229-
;
230-
; GFX12-LABEL: fadd_v2s32_div:
231-
; GFX12: ; %bb.0:
232-
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
233-
; GFX12-NEXT: s_wait_expcnt 0x0
234-
; GFX12-NEXT: s_wait_samplecnt 0x0
235-
; GFX12-NEXT: s_wait_bvhcnt 0x0
236-
; GFX12-NEXT: s_wait_kmcnt 0x0
237-
; GFX12-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
238-
; GFX12-NEXT: s_setpc_b64 s[30:31]
182+
define amdgpu_ps <2 x float> @fadd_v2s32_div(<2 x float> %a, <2 x float> %b) {
183+
; GCN-LABEL: fadd_v2s32_div:
184+
; GCN: ; %bb.0:
185+
; GCN-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3
186+
; GCN-NEXT: ; return to shader part epilog
239187
%fadd = fadd <2 x float> %a, %b
240188
ret <2 x float> %fadd
241189
}
242-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
243-
; GFX11-GISEL: {{.*}}
244-
; GFX11-SDAG: {{.*}}
245-
; GFX12-GISEL: {{.*}}
246-
; GFX12-SDAG: {{.*}}

0 commit comments

Comments
 (0)