|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
2 | | -; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s |
3 | | -; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s |
4 | | -; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s |
5 | | -; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s |
6 | | -; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG,GFX12-SDAG-FAKE16 %s |
7 | | -; RUN: llc -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-SDAG,GFX12-SDAG-TRUE16 %s |
8 | | -; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL,GFX12-GISEL-FAKE16 %s |
9 | | -; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-GISEL,GFX12-GISEL-TRUE16 %s |
| 2 | +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s |
| 3 | +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s |
| 4 | +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s |
| 5 | +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s |
10 | 6 |
|
11 | 7 | define amdgpu_ps half @fadd_s16_uniform(half inreg %a, half inreg %b) { |
12 | | -; GFX11-SDAG-FAKE16-LABEL: fadd_s16_uniform: |
13 | | -; GFX11-SDAG-FAKE16: ; %bb.0: |
14 | | -; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e64 v0, s0, s1 |
15 | | -; GFX11-SDAG-FAKE16-NEXT: ; return to shader part epilog |
16 | | -; |
17 | | -; GFX11-SDAG-TRUE16-LABEL: fadd_s16_uniform: |
18 | | -; GFX11-SDAG-TRUE16: ; %bb.0: |
19 | | -; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1 |
20 | | -; GFX11-SDAG-TRUE16-NEXT: ; return to shader part epilog |
21 | | -; |
22 | | -; GFX11-GISEL-FAKE16-LABEL: fadd_s16_uniform: |
23 | | -; GFX11-GISEL-FAKE16: ; %bb.0: |
24 | | -; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e64 v0, s0, s1 |
25 | | -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
26 | | -; GFX11-GISEL-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 |
27 | | -; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s0 |
28 | | -; GFX11-GISEL-FAKE16-NEXT: ; return to shader part epilog |
29 | | -; |
30 | | -; GFX11-GISEL-TRUE16-LABEL: fadd_s16_uniform: |
31 | | -; GFX11-GISEL-TRUE16: ; %bb.0: |
32 | | -; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1 |
33 | | -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
34 | | -; GFX11-GISEL-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 |
35 | | -; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s0 |
36 | | -; GFX11-GISEL-TRUE16-NEXT: ; return to shader part epilog |
| 8 | +; GFX11-FAKE16-LABEL: fadd_s16_uniform: |
| 9 | +; GFX11-FAKE16: ; %bb.0: |
| 10 | +; GFX11-FAKE16-NEXT: v_add_f16_e64 v0, s0, s1 |
| 11 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 12 | +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 |
| 13 | +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s0 |
| 14 | +; GFX11-FAKE16-NEXT: ; return to shader part epilog |
| 15 | +; |
| 16 | +; GFX11-TRUE16-LABEL: fadd_s16_uniform: |
| 17 | +; GFX11-TRUE16: ; %bb.0: |
| 18 | +; GFX11-TRUE16-NEXT: v_add_f16_e64 v0.l, s0, s1 |
| 19 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 20 | +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 |
| 21 | +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, s0 |
| 22 | +; GFX11-TRUE16-NEXT: ; return to shader part epilog |
37 | 23 | ; |
38 | 24 | ; GFX12-LABEL: fadd_s16_uniform: |
39 | 25 | ; GFX12: ; %bb.0: |
40 | 26 | ; GFX12-NEXT: s_add_f16 s0, s0, s1 |
41 | 27 | ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
42 | 28 | ; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
43 | 29 | ; GFX12-NEXT: ; return to shader part epilog |
| 30 | +; -LABEL: fadd_s16_uniform: |
| 31 | +; : ; %bb.0: |
| 32 | +; -NEXT: v_add_f16_e64 v0.l, s0, s1 |
| 33 | +; -NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 34 | +; -NEXT: v_readfirstlane_b32 s0, v0 |
| 35 | +; -NEXT: v_mov_b32_e32 v0, s0 |
| 36 | +; -NEXT: ; return to shader part epilog |
44 | 37 | %fadd = fadd half %a, %b |
45 | 38 | ret half %fadd |
46 | 39 | } |
47 | 40 |
|
48 | 41 | define amdgpu_ps half @fadd_s16_div(half %a, half %b) { |
49 | | -; GFX11-SDAG-FAKE16-LABEL: fadd_s16_div: |
50 | | -; GFX11-SDAG-FAKE16: ; %bb.0: |
51 | | -; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1 |
52 | | -; GFX11-SDAG-FAKE16-NEXT: ; return to shader part epilog |
53 | | -; |
54 | | -; GFX11-SDAG-TRUE16-LABEL: fadd_s16_div: |
55 | | -; GFX11-SDAG-TRUE16: ; %bb.0: |
56 | | -; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l |
57 | | -; GFX11-SDAG-TRUE16-NEXT: ; return to shader part epilog |
58 | | -; |
59 | | -; GFX11-GISEL-FAKE16-LABEL: fadd_s16_div: |
60 | | -; GFX11-GISEL-FAKE16: ; %bb.0: |
61 | | -; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1 |
62 | | -; GFX11-GISEL-FAKE16-NEXT: ; return to shader part epilog |
63 | | -; |
64 | | -; GFX11-GISEL-TRUE16-LABEL: fadd_s16_div: |
65 | | -; GFX11-GISEL-TRUE16: ; %bb.0: |
66 | | -; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l |
67 | | -; GFX11-GISEL-TRUE16-NEXT: ; return to shader part epilog |
68 | | -; |
69 | | -; GFX12-SDAG-FAKE16-LABEL: fadd_s16_div: |
70 | | -; GFX12-SDAG-FAKE16: ; %bb.0: |
71 | | -; GFX12-SDAG-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1 |
72 | | -; GFX12-SDAG-FAKE16-NEXT: ; return to shader part epilog |
73 | | -; |
74 | | -; GFX12-SDAG-TRUE16-LABEL: fadd_s16_div: |
75 | | -; GFX12-SDAG-TRUE16: ; %bb.0: |
76 | | -; GFX12-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l |
77 | | -; GFX12-SDAG-TRUE16-NEXT: ; return to shader part epilog |
78 | | -; |
79 | | -; GFX12-GISEL-FAKE16-LABEL: fadd_s16_div: |
80 | | -; GFX12-GISEL-FAKE16: ; %bb.0: |
81 | | -; GFX12-GISEL-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1 |
82 | | -; GFX12-GISEL-FAKE16-NEXT: ; return to shader part epilog |
83 | | -; |
84 | | -; GFX12-GISEL-TRUE16-LABEL: fadd_s16_div: |
85 | | -; GFX12-GISEL-TRUE16: ; %bb.0: |
86 | | -; GFX12-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l |
87 | | -; GFX12-GISEL-TRUE16-NEXT: ; return to shader part epilog |
| 42 | +; GFX11-FAKE16-LABEL: fadd_s16_div: |
| 43 | +; GFX11-FAKE16: ; %bb.0: |
| 44 | +; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1 |
| 45 | +; GFX11-FAKE16-NEXT: ; return to shader part epilog |
| 46 | +; |
| 47 | +; GFX11-TRUE16-LABEL: fadd_s16_div: |
| 48 | +; GFX11-TRUE16: ; %bb.0: |
| 49 | +; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l |
| 50 | +; GFX11-TRUE16-NEXT: ; return to shader part epilog |
| 51 | +; |
| 52 | +; GFX12-FAKE16-LABEL: fadd_s16_div: |
| 53 | +; GFX12-FAKE16: ; %bb.0: |
| 54 | +; GFX12-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1 |
| 55 | +; GFX12-FAKE16-NEXT: ; return to shader part epilog |
| 56 | +; |
| 57 | +; GFX12-TRUE16-LABEL: fadd_s16_div: |
| 58 | +; GFX12-TRUE16: ; %bb.0: |
| 59 | +; GFX12-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l |
| 60 | +; GFX12-TRUE16-NEXT: ; return to shader part epilog |
| 61 | +; -LABEL: fadd_s16_div: |
| 62 | +; : ; %bb.0: |
| 63 | +; -NEXT: v_add_f16_e32 v0.l, v0.l, v1.l |
| 64 | +; -NEXT: ; return to shader part epilog |
88 | 65 | %fadd = fadd half %a, %b |
89 | 66 | ret half %fadd |
90 | 67 | } |
@@ -155,92 +132,58 @@ define amdgpu_ps double @fadd_s64_div(double %a, double %b) { |
155 | 132 | ret double %fadd |
156 | 133 | } |
157 | 134 |
|
158 | | -define <2 x half> @fadd_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) { |
| 135 | +define amdgpu_ps <2 x half> @fadd_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) { |
159 | 136 | ; GFX11-LABEL: fadd_v2s16_uniform: |
160 | 137 | ; GFX11: ; %bb.0: |
161 | | -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
162 | 138 | ; GFX11-NEXT: v_pk_add_f16 v0, s0, s1 |
163 | | -; GFX11-NEXT: s_setpc_b64 s[30:31] |
| 139 | +; GFX11-NEXT: ; return to shader part epilog |
164 | 140 | ; |
165 | 141 | ; GFX12-LABEL: fadd_v2s16_uniform: |
166 | 142 | ; GFX12: ; %bb.0: |
167 | | -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
168 | | -; GFX12-NEXT: s_wait_expcnt 0x0 |
169 | | -; GFX12-NEXT: s_wait_samplecnt 0x0 |
170 | | -; GFX12-NEXT: s_wait_bvhcnt 0x0 |
171 | | -; GFX12-NEXT: s_wait_kmcnt 0x0 |
172 | | -; GFX12-NEXT: v_pk_add_f16 v0, s0, s1 |
173 | | -; GFX12-NEXT: s_setpc_b64 s[30:31] |
| 143 | +; GFX12-NEXT: s_lshr_b32 s2, s0, 16 |
| 144 | +; GFX12-NEXT: s_lshr_b32 s3, s1, 16 |
| 145 | +; GFX12-NEXT: s_add_f16 s0, s0, s1 |
| 146 | +; GFX12-NEXT: s_add_f16 s1, s2, s3 |
| 147 | +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1) |
| 148 | +; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s1 |
| 149 | +; GFX12-NEXT: v_mov_b32_e32 v0, s0 |
| 150 | +; GFX12-NEXT: ; return to shader part epilog |
174 | 151 | %fadd = fadd <2 x half> %a, %b |
175 | 152 | ret <2 x half> %fadd |
176 | 153 | } |
177 | 154 |
|
178 | | -define <2 x half> @fadd_v2s16_div(<2 x half> %a, <2 x half> %b) { |
179 | | -; GFX11-LABEL: fadd_v2s16_div: |
180 | | -; GFX11: ; %bb.0: |
181 | | -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
182 | | -; GFX11-NEXT: v_pk_add_f16 v0, v0, v1 |
183 | | -; GFX11-NEXT: s_setpc_b64 s[30:31] |
184 | | -; |
185 | | -; GFX12-LABEL: fadd_v2s16_div: |
186 | | -; GFX12: ; %bb.0: |
187 | | -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
188 | | -; GFX12-NEXT: s_wait_expcnt 0x0 |
189 | | -; GFX12-NEXT: s_wait_samplecnt 0x0 |
190 | | -; GFX12-NEXT: s_wait_bvhcnt 0x0 |
191 | | -; GFX12-NEXT: s_wait_kmcnt 0x0 |
192 | | -; GFX12-NEXT: v_pk_add_f16 v0, v0, v1 |
193 | | -; GFX12-NEXT: s_setpc_b64 s[30:31] |
| 155 | +define amdgpu_ps <2 x half> @fadd_v2s16_div(<2 x half> %a, <2 x half> %b) { |
| 156 | +; GCN-LABEL: fadd_v2s16_div: |
| 157 | +; GCN: ; %bb.0: |
| 158 | +; GCN-NEXT: v_pk_add_f16 v0, v0, v1 |
| 159 | +; GCN-NEXT: ; return to shader part epilog |
194 | 160 | %fadd = fadd <2 x half> %a, %b |
195 | 161 | ret <2 x half> %fadd |
196 | 162 | } |
197 | 163 |
|
198 | | -define <2 x float> @fadd_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) { |
| 164 | +define amdgpu_ps <2 x float> @fadd_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) { |
199 | 165 | ; GFX11-LABEL: fadd_v2s32_uniform: |
200 | 166 | ; GFX11: ; %bb.0: |
201 | | -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
202 | 167 | ; GFX11-NEXT: v_add_f32_e64 v0, s0, s2 |
203 | 168 | ; GFX11-NEXT: v_add_f32_e64 v1, s1, s3 |
204 | | -; GFX11-NEXT: s_setpc_b64 s[30:31] |
| 169 | +; GFX11-NEXT: ; return to shader part epilog |
205 | 170 | ; |
206 | 171 | ; GFX12-LABEL: fadd_v2s32_uniform: |
207 | 172 | ; GFX12: ; %bb.0: |
208 | | -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
209 | | -; GFX12-NEXT: s_wait_expcnt 0x0 |
210 | | -; GFX12-NEXT: s_wait_samplecnt 0x0 |
211 | | -; GFX12-NEXT: s_wait_bvhcnt 0x0 |
212 | | -; GFX12-NEXT: s_wait_kmcnt 0x0 |
213 | 173 | ; GFX12-NEXT: s_add_f32 s0, s0, s2 |
214 | 174 | ; GFX12-NEXT: s_add_f32 s1, s1, s3 |
215 | | -; GFX12-NEXT: s_wait_alu 0xfffe |
216 | | -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) |
| 175 | +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) |
217 | 176 | ; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
218 | | -; GFX12-NEXT: s_setpc_b64 s[30:31] |
| 177 | +; GFX12-NEXT: ; return to shader part epilog |
219 | 178 | %fadd = fadd <2 x float> %a, %b |
220 | 179 | ret <2 x float> %fadd |
221 | 180 | } |
222 | 181 |
|
223 | | -define <2 x float> @fadd_v2s32_div(<2 x float> %a, <2 x float> %b) { |
224 | | -; GFX11-LABEL: fadd_v2s32_div: |
225 | | -; GFX11: ; %bb.0: |
226 | | -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
227 | | -; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3 |
228 | | -; GFX11-NEXT: s_setpc_b64 s[30:31] |
229 | | -; |
230 | | -; GFX12-LABEL: fadd_v2s32_div: |
231 | | -; GFX12: ; %bb.0: |
232 | | -; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
233 | | -; GFX12-NEXT: s_wait_expcnt 0x0 |
234 | | -; GFX12-NEXT: s_wait_samplecnt 0x0 |
235 | | -; GFX12-NEXT: s_wait_bvhcnt 0x0 |
236 | | -; GFX12-NEXT: s_wait_kmcnt 0x0 |
237 | | -; GFX12-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3 |
238 | | -; GFX12-NEXT: s_setpc_b64 s[30:31] |
| 182 | +define amdgpu_ps <2 x float> @fadd_v2s32_div(<2 x float> %a, <2 x float> %b) { |
| 183 | +; GCN-LABEL: fadd_v2s32_div: |
| 184 | +; GCN: ; %bb.0: |
| 185 | +; GCN-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3 |
| 186 | +; GCN-NEXT: ; return to shader part epilog |
239 | 187 | %fadd = fadd <2 x float> %a, %b |
240 | 188 | ret <2 x float> %fadd |
241 | 189 | } |
242 | | -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
243 | | -; GFX11-GISEL: {{.*}} |
244 | | -; GFX11-SDAG: {{.*}} |
245 | | -; GFX12-GISEL: {{.*}} |
246 | | -; GFX12-SDAG: {{.*}} |
0 commit comments