1
- ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -check-prefix=FUNC -check-prefix=CI %s
1
+ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
2
+ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
2
3
3
4
declare void @llvm.amdgcn.tbuffer.store.i32 (i32 , <4 x i32 >, i32 , i32 , i32 , i32 , i32 , i32 , i1 , i1 )
4
5
declare void @llvm.amdgcn.tbuffer.store.v4i32 (<4 x i32 >, <4 x i32 >, i32 , i32 , i32 , i32 , i32 , i32 , i1 , i1 )
@@ -10,9 +11,13 @@ declare i32 @llvm.amdgcn.workitem.id.x() #2
10
11
@stored_constant_ptr = addrspace (3 ) global i32 addrspace (2 )* undef , align 8
11
12
@stored_global_ptr = addrspace (3 ) global i32 addrspace (1 )* undef , align 8
12
13
13
- ; FUNC -LABEL: @ reorder_local_load_global_store_local_load
14
+ ; GCN -LABEL: {{^}} reorder_local_load_global_store_local_load:
14
15
; CI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:1 offset1:3
15
16
; CI: buffer_store_dword
17
+
18
+ ; GFX9: global_store_dword
19
+ ; GFX9: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:1 offset1:3
20
+ ; GFX9: global_store_dword
16
21
define amdgpu_kernel void @reorder_local_load_global_store_local_load (i32 addrspace (1 )* %out , i32 addrspace (1 )* %gptr ) #0 {
17
22
%ptr0 = load i32 addrspace (3 )*, i32 addrspace (3 )* addrspace (3 )* @stored_lds_ptr , align 4
18
23
@@ -29,10 +34,14 @@ define amdgpu_kernel void @reorder_local_load_global_store_local_load(i32 addrsp
29
34
ret void
30
35
}
31
36
32
- ; FUNC -LABEL: @ no_reorder_local_load_volatile_global_store_local_load
37
+ ; GCN -LABEL: {{^}} no_reorder_local_load_volatile_global_store_local_load:
33
38
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
34
39
; CI: buffer_store_dword
35
40
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
41
+
42
+ ; GFX9: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
43
+ ; GFX9: global_store_dword
44
+ ; GFX9: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
36
45
define amdgpu_kernel void @no_reorder_local_load_volatile_global_store_local_load (i32 addrspace (1 )* %out , i32 addrspace (1 )* %gptr ) #0 {
37
46
%ptr0 = load i32 addrspace (3 )*, i32 addrspace (3 )* addrspace (3 )* @stored_lds_ptr , align 4
38
47
@@ -49,10 +58,16 @@ define amdgpu_kernel void @no_reorder_local_load_volatile_global_store_local_loa
49
58
ret void
50
59
}
51
60
52
- ; FUNC -LABEL: @ no_reorder_barrier_local_load_global_store_local_load
61
+ ; GCN -LABEL: {{^}} no_reorder_barrier_local_load_global_store_local_load:
53
62
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
54
63
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
55
64
; CI: buffer_store_dword
65
+
66
+ ; GFX9: global_store_dword
67
+ ; GFX9: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
68
+ ; GFX9: s_barrier
69
+ ; GFX9: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
70
+ ; GFX9: global_store_dword
56
71
define amdgpu_kernel void @no_reorder_barrier_local_load_global_store_local_load (i32 addrspace (1 )* %out , i32 addrspace (1 )* %gptr ) #0 {
57
72
%ptr0 = load i32 addrspace (3 )*, i32 addrspace (3 )* addrspace (3 )* @stored_lds_ptr , align 4
58
73
@@ -70,13 +85,20 @@ define amdgpu_kernel void @no_reorder_barrier_local_load_global_store_local_load
70
85
ret void
71
86
}
72
87
73
- ; FUNC -LABEL: @ reorder_constant_load_global_store_constant_load
74
- ; CI -DAG: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
75
- ; CI : v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
88
+ ; GCN -LABEL: {{^}} reorder_constant_load_global_store_constant_load:
89
+ ; GCN -DAG: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
90
+ ; GCN : v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
76
91
; CI: buffer_store_dword
92
+
77
93
; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x1
78
94
; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x3
95
+
96
+ ; GFX9: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x4
97
+ ; GFX9: global_store_dword
98
+ ; GFX9: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0xc
99
+
79
100
; CI: buffer_store_dword
101
+ ; GFX9: global_store_dword
80
102
define amdgpu_kernel void @reorder_constant_load_global_store_constant_load (i32 addrspace (1 )* %out , i32 addrspace (1 )* %gptr ) #0 {
81
103
%ptr0 = load i32 addrspace (2 )*, i32 addrspace (2 )* addrspace (3 )* @stored_constant_ptr , align 8
82
104
@@ -93,13 +115,19 @@ define amdgpu_kernel void @reorder_constant_load_global_store_constant_load(i32
93
115
ret void
94
116
}
95
117
96
- ; FUNC-LABEL: @reorder_constant_load_local_store_constant_load
97
- ; CI: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
98
- ; CI: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
118
+ ; GCN-LABEL: {{^}}reorder_constant_load_local_store_constant_load:
119
+ ; GCN: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}}
120
+ ; GCN: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}}
121
+
99
122
; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x1
100
123
; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x3
101
- ; CI: ds_write_b32
124
+
125
+ ; GFX9-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x4
126
+ ; GFX9-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0xc
127
+
128
+ ; GCN: ds_write_b32
102
129
; CI: buffer_store_dword
130
+ ; GFX9: global_store_dword
103
131
define amdgpu_kernel void @reorder_constant_load_local_store_constant_load (i32 addrspace (1 )* %out , i32 addrspace (3 )* %lptr ) #0 {
104
132
%ptr0 = load i32 addrspace (2 )*, i32 addrspace (2 )* addrspace (3 )* @stored_constant_ptr , align 8
105
133
@@ -116,12 +144,13 @@ define amdgpu_kernel void @reorder_constant_load_local_store_constant_load(i32 a
116
144
ret void
117
145
}
118
146
119
- ; FUNC -LABEL: @ reorder_smrd_load_local_store_smrd_load
120
- ; CI : s_load_dword
121
- ; CI : s_load_dword
122
- ; CI : s_load_dword
123
- ; CI : ds_write_b32
147
+ ; GCN -LABEL: {{^}} reorder_smrd_load_local_store_smrd_load:
148
+ ; GCN : s_load_dword
149
+ ; GCN : s_load_dword
150
+ ; GCN : s_load_dword
151
+ ; GCN : ds_write_b32
124
152
; CI: buffer_store_dword
153
+ ; GFX9: global_store_dword
125
154
define amdgpu_kernel void @reorder_smrd_load_local_store_smrd_load (i32 addrspace (1 )* %out , i32 addrspace (3 )* noalias %lptr , i32 addrspace (2 )* %ptr0 ) #0 {
126
155
%ptr1 = getelementptr inbounds i32 , i32 addrspace (2 )* %ptr0 , i64 1
127
156
%ptr2 = getelementptr inbounds i32 , i32 addrspace (2 )* %ptr0 , i64 2
@@ -136,11 +165,15 @@ define amdgpu_kernel void @reorder_smrd_load_local_store_smrd_load(i32 addrspace
136
165
ret void
137
166
}
138
167
139
- ; FUNC -LABEL: @ reorder_global_load_local_store_global_load
168
+ ; GCN -LABEL: {{^}} reorder_global_load_local_store_global_load:
140
169
; CI: ds_write_b32
141
170
; CI: buffer_load_dword
142
171
; CI: buffer_load_dword
143
172
; CI: buffer_store_dword
173
+
174
+ ; GFX9: global_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:4
175
+ ; GFX9: global_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:12
176
+ ; GFX9: ds_write_b32
144
177
define amdgpu_kernel void @reorder_global_load_local_store_global_load (i32 addrspace (1 )* %out , i32 addrspace (3 )* %lptr , i32 addrspace (1 )* %ptr0 ) #0 {
145
178
%ptr1 = getelementptr inbounds i32 , i32 addrspace (1 )* %ptr0 , i64 1
146
179
%ptr2 = getelementptr inbounds i32 , i32 addrspace (1 )* %ptr0 , i64 3
@@ -155,12 +188,13 @@ define amdgpu_kernel void @reorder_global_load_local_store_global_load(i32 addrs
155
188
ret void
156
189
}
157
190
158
- ; FUNC -LABEL: @ reorder_local_offsets
159
- ; CI : ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:100 offset1:102
160
- ; CI -DAG: ds_write2_b32 {{v[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:3 offset1:100
161
- ; CI -DAG: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408
191
+ ; GCN -LABEL: {{^}} reorder_local_offsets:
192
+ ; GCN : ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:100 offset1:102
193
+ ; GCN -DAG: ds_write2_b32 {{v[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:3 offset1:100
194
+ ; GCN -DAG: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408
162
195
; CI: buffer_store_dword
163
- ; CI: s_endpgm
196
+ ; GFX9: global_store_dword
197
+ ; GCN: s_endpgm
164
198
define amdgpu_kernel void @reorder_local_offsets (i32 addrspace (1 )* nocapture %out , i32 addrspace (1 )* noalias nocapture readnone %gptr , i32 addrspace (3 )* noalias nocapture %ptr0 ) #0 {
165
199
%ptr1 = getelementptr inbounds i32 , i32 addrspace (3 )* %ptr0 , i32 3
166
200
%ptr2 = getelementptr inbounds i32 , i32 addrspace (3 )* %ptr0 , i32 100
@@ -179,14 +213,22 @@ define amdgpu_kernel void @reorder_local_offsets(i32 addrspace(1)* nocapture %ou
179
213
ret void
180
214
}
181
215
182
- ; FUNC -LABEL: @ reorder_global_offsets
216
+ ; GCN -LABEL: {{^}} reorder_global_offsets:
183
217
; CI-DAG: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
184
218
; CI-DAG: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408
185
219
; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
186
220
; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
187
221
; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408
188
222
; CI: buffer_store_dword
189
223
; CI: s_endpgm
224
+
225
+ ; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:400
226
+ ; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:408
227
+ ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, off offset:12
228
+ ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, off offset:400
229
+ ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, off offset:408
230
+ ; GFX9: global_store_dword
231
+ ; GFX9: s_endpgm
190
232
define amdgpu_kernel void @reorder_global_offsets (i32 addrspace (1 )* nocapture %out , i32 addrspace (1 )* noalias nocapture readnone %gptr , i32 addrspace (1 )* noalias nocapture %ptr0 ) #0 {
191
233
%ptr1 = getelementptr inbounds i32 , i32 addrspace (1 )* %ptr0 , i32 3
192
234
%ptr2 = getelementptr inbounds i32 , i32 addrspace (1 )* %ptr0 , i32 100
@@ -205,22 +247,33 @@ define amdgpu_kernel void @reorder_global_offsets(i32 addrspace(1)* nocapture %o
205
247
ret void
206
248
}
207
249
208
- ; FUNC-LABEL: {{^}}reorder_global_offsets_addr64_soffset0:
209
- ; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:12{{$}}
210
- ; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:28{{$}}
211
- ; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:44{{$}}
250
+ ; GCN-LABEL: {{^}}reorder_global_offsets_addr64_soffset0:
251
+ ; CI: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
252
+ ; CI-NEXT: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:28{{$}}
253
+ ; CI-NEXT: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:44{{$}}
254
+
255
+ ; CI: v_mov_b32
256
+ ; CI: v_mov_b32
257
+
258
+ ; CI: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
259
+ ; CI-NEXT: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
260
+
261
+ ; CI: v_add_i32
262
+ ; CI: v_add_i32
263
+
264
+ ; CI: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:36{{$}}
265
+ ; CI-NEXT: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:52{{$}}
212
266
213
- ; GCN: v_mov_b32
214
- ; GCN: v_mov_b32
215
267
216
- ; GCN: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64{{$}}
217
- ; GCN-NEXT: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} 0 addr64 offset:20{{$}}
268
+ ; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:12
269
+ ; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:28
270
+ ; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:44
218
271
219
- ; GCN: v_add_i32
220
- ; GCN: v_add_i32
272
+ ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off{{$}}
273
+ ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off offset:20
221
274
222
- ; GCN: buffer_store_dword v{{[0-9]+}}, v{{ \[[0-9]+:[0-9]+\]}}, s{{\[[ 0-9]+:[0-9]+\]}} 0 addr64 offset:36{{$}}
223
- ; GCN-NEXT: buffer_store_dword v{{[0-9]+}}, v{{ \[[0-9]+:[0-9]+\]}}, s{{\[[ 0-9]+:[0-9]+\]}} 0 addr64 offset:52{{$}}
275
+ ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[ 0-9]+}}, off offset:36
276
+ ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[ 0-9]+}}, off offset:52
224
277
define amdgpu_kernel void @reorder_global_offsets_addr64_soffset0 (i32 addrspace (1 )* noalias nocapture %ptr.base ) #0 {
225
278
%id = call i32 @llvm.amdgcn.workitem.id.x ()
226
279
%id.ext = sext i32 %id to i64
@@ -245,7 +298,7 @@ define amdgpu_kernel void @reorder_global_offsets_addr64_soffset0(i32 addrspace(
245
298
ret void
246
299
}
247
300
248
- ; XFUNC -LABEL: @ reorder_local_load_tbuffer_store_local_load
301
+ ; XGCN -LABEL: {{^}} reorder_local_load_tbuffer_store_local_load:
249
302
; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x4
250
303
; XCI: TBUFFER_STORE_FORMAT
251
304
; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x8
0 commit comments