1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2
+ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
2
3
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3
4
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4
5
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5
6
6
7
define i8 @atomic_load_global_monotonic_i8 (ptr addrspace (1 ) %ptr ) {
8
+ ; GFX6-LABEL: atomic_load_global_monotonic_i8:
9
+ ; GFX6: ; %bb.0:
10
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11
+ ; GFX6-NEXT: s_mov_b32 s6, 0
12
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
13
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
14
+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
15
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
16
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
17
+ ;
7
18
; GFX7-LABEL: atomic_load_global_monotonic_i8:
8
19
; GFX7: ; %bb.0:
9
20
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -29,6 +40,16 @@ define i8 @atomic_load_global_monotonic_i8(ptr addrspace(1) %ptr) {
29
40
}
30
41
31
42
define i32 @atomic_load_global_monotonic_i8_zext_to_i32 (ptr addrspace (1 ) %ptr ) {
43
+ ; GFX6-LABEL: atomic_load_global_monotonic_i8_zext_to_i32:
44
+ ; GFX6: ; %bb.0:
45
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46
+ ; GFX6-NEXT: s_mov_b32 s6, 0
47
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
48
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
49
+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
50
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
51
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
52
+ ;
32
53
; GFX7-LABEL: atomic_load_global_monotonic_i8_zext_to_i32:
33
54
; GFX7: ; %bb.0:
34
55
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -55,6 +76,18 @@ define i32 @atomic_load_global_monotonic_i8_zext_to_i32(ptr addrspace(1) %ptr) {
55
76
}
56
77
57
78
define i32 @atomic_load_global_monotonic_i8_sext_to_i32 (ptr addrspace (1 ) %ptr ) {
79
+ ; GFX6-LABEL: atomic_load_global_monotonic_i8_sext_to_i32:
80
+ ; GFX6: ; %bb.0:
81
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82
+ ; GFX6-NEXT: s_mov_b32 s6, 0
83
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
84
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
85
+ ; GFX6-NEXT: buffer_load_sbyte v2, v[0:1], s[4:7], 0 addr64 glc
86
+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
87
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
88
+ ; GFX6-NEXT: v_mov_b32_e32 v0, v2
89
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
90
+ ;
58
91
; GFX7-LABEL: atomic_load_global_monotonic_i8_sext_to_i32:
59
92
; GFX7: ; %bb.0:
60
93
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -88,6 +121,16 @@ define i32 @atomic_load_global_monotonic_i8_sext_to_i32(ptr addrspace(1) %ptr) {
88
121
}
89
122
90
123
define i16 @atomic_load_global_monotonic_i8_zext_to_i16 (ptr addrspace (1 ) %ptr ) {
124
+ ; GFX6-LABEL: atomic_load_global_monotonic_i8_zext_to_i16:
125
+ ; GFX6: ; %bb.0:
126
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127
+ ; GFX6-NEXT: s_mov_b32 s6, 0
128
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
129
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
130
+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
131
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
132
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
133
+ ;
91
134
; GFX7-LABEL: atomic_load_global_monotonic_i8_zext_to_i16:
92
135
; GFX7: ; %bb.0:
93
136
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -114,6 +157,18 @@ define i16 @atomic_load_global_monotonic_i8_zext_to_i16(ptr addrspace(1) %ptr) {
114
157
}
115
158
116
159
define i16 @atomic_load_global_monotonic_i8_sext_to_i16 (ptr addrspace (1 ) %ptr ) {
160
+ ; GFX6-LABEL: atomic_load_global_monotonic_i8_sext_to_i16:
161
+ ; GFX6: ; %bb.0:
162
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163
+ ; GFX6-NEXT: s_mov_b32 s6, 0
164
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
165
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
166
+ ; GFX6-NEXT: buffer_load_sbyte v2, v[0:1], s[4:7], 0 addr64 glc
167
+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
168
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
169
+ ; GFX6-NEXT: v_mov_b32_e32 v0, v2
170
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
171
+ ;
117
172
; GFX7-LABEL: atomic_load_global_monotonic_i8_sext_to_i16:
118
173
; GFX7: ; %bb.0:
119
174
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -147,6 +202,16 @@ define i16 @atomic_load_global_monotonic_i8_sext_to_i16(ptr addrspace(1) %ptr) {
147
202
}
148
203
149
204
define i16 @atomic_load_global_monotonic_i16 (ptr addrspace (1 ) %ptr ) {
205
+ ; GFX6-LABEL: atomic_load_global_monotonic_i16:
206
+ ; GFX6: ; %bb.0:
207
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208
+ ; GFX6-NEXT: s_mov_b32 s6, 0
209
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
210
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
211
+ ; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
212
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
213
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
214
+ ;
150
215
; GFX7-LABEL: atomic_load_global_monotonic_i16:
151
216
; GFX7: ; %bb.0:
152
217
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -172,6 +237,16 @@ define i16 @atomic_load_global_monotonic_i16(ptr addrspace(1) %ptr) {
172
237
}
173
238
174
239
define i32 @atomic_load_global_monotonic_i16_zext_to_i32 (ptr addrspace (1 ) %ptr ) {
240
+ ; GFX6-LABEL: atomic_load_global_monotonic_i16_zext_to_i32:
241
+ ; GFX6: ; %bb.0:
242
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
243
+ ; GFX6-NEXT: s_mov_b32 s6, 0
244
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
245
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
246
+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
247
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
248
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
249
+ ;
175
250
; GFX7-LABEL: atomic_load_global_monotonic_i16_zext_to_i32:
176
251
; GFX7: ; %bb.0:
177
252
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -198,6 +273,18 @@ define i32 @atomic_load_global_monotonic_i16_zext_to_i32(ptr addrspace(1) %ptr)
198
273
}
199
274
200
275
define i32 @atomic_load_global_monotonic_i16_sext_to_i32 (ptr addrspace (1 ) %ptr ) {
276
+ ; GFX6-LABEL: atomic_load_global_monotonic_i16_sext_to_i32:
277
+ ; GFX6: ; %bb.0:
278
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279
+ ; GFX6-NEXT: s_mov_b32 s6, 0
280
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
281
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
282
+ ; GFX6-NEXT: buffer_load_sbyte v2, v[0:1], s[4:7], 0 addr64 glc
283
+ ; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
284
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
285
+ ; GFX6-NEXT: v_mov_b32_e32 v0, v2
286
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
287
+ ;
201
288
; GFX7-LABEL: atomic_load_global_monotonic_i16_sext_to_i32:
202
289
; GFX7: ; %bb.0:
203
290
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -231,6 +318,16 @@ define i32 @atomic_load_global_monotonic_i16_sext_to_i32(ptr addrspace(1) %ptr)
231
318
}
232
319
233
320
define half @atomic_load_global_monotonic_f16 (ptr addrspace (1 ) %ptr ) {
321
+ ; GFX6-LABEL: atomic_load_global_monotonic_f16:
322
+ ; GFX6: ; %bb.0:
323
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
324
+ ; GFX6-NEXT: s_mov_b32 s6, 0
325
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
326
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
327
+ ; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
328
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
329
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
330
+ ;
234
331
; GFX7-LABEL: atomic_load_global_monotonic_f16:
235
332
; GFX7: ; %bb.0:
236
333
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -256,6 +353,16 @@ define half @atomic_load_global_monotonic_f16(ptr addrspace(1) %ptr) {
256
353
}
257
354
258
355
define bfloat @atomic_load_global_monotonic_bf16 (ptr addrspace (1 ) %ptr ) {
356
+ ; GFX6-LABEL: atomic_load_global_monotonic_bf16:
357
+ ; GFX6: ; %bb.0:
358
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
359
+ ; GFX6-NEXT: s_mov_b32 s6, 0
360
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
361
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
362
+ ; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
363
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
364
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
365
+ ;
259
366
; GFX7-LABEL: atomic_load_global_monotonic_bf16:
260
367
; GFX7: ; %bb.0:
261
368
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -281,6 +388,16 @@ define bfloat @atomic_load_global_monotonic_bf16(ptr addrspace(1) %ptr) {
281
388
}
282
389
283
390
define i32 @atomic_load_global_monotonic_f16_zext_to_i32 (ptr addrspace (1 ) %ptr ) {
391
+ ; GFX6-LABEL: atomic_load_global_monotonic_f16_zext_to_i32:
392
+ ; GFX6: ; %bb.0:
393
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
394
+ ; GFX6-NEXT: s_mov_b32 s6, 0
395
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
396
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
397
+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
398
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
399
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
400
+ ;
284
401
; GFX7-LABEL: atomic_load_global_monotonic_f16_zext_to_i32:
285
402
; GFX7: ; %bb.0:
286
403
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -308,6 +425,16 @@ define i32 @atomic_load_global_monotonic_f16_zext_to_i32(ptr addrspace(1) %ptr)
308
425
}
309
426
310
427
define i32 @atomic_load_global_monotonic_bf16_zext_to_i32 (ptr addrspace (1 ) %ptr ) {
428
+ ; GFX6-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32:
429
+ ; GFX6: ; %bb.0:
430
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431
+ ; GFX6-NEXT: s_mov_b32 s6, 0
432
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
433
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
434
+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
435
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
436
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
437
+ ;
311
438
; GFX7-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32:
312
439
; GFX7: ; %bb.0:
313
440
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -335,6 +462,17 @@ define i32 @atomic_load_global_monotonic_bf16_zext_to_i32(ptr addrspace(1) %ptr)
335
462
}
336
463
337
464
define i32 @atomic_load_global_monotonic_i16_d16_hi_shift (ptr addrspace (1 ) %ptr ) {
465
+ ; GFX6-LABEL: atomic_load_global_monotonic_i16_d16_hi_shift:
466
+ ; GFX6: ; %bb.0:
467
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
468
+ ; GFX6-NEXT: s_mov_b32 s6, 0
469
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
470
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
471
+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
472
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
473
+ ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
474
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
475
+ ;
338
476
; GFX7-LABEL: atomic_load_global_monotonic_i16_d16_hi_shift:
339
477
; GFX7: ; %bb.0:
340
478
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -365,6 +503,23 @@ define i32 @atomic_load_global_monotonic_i16_d16_hi_shift(ptr addrspace(1) %ptr)
365
503
}
366
504
367
505
define <2 x i16 > @atomic_load_global_monotonic_i16_d16_hi_vector_insert (ptr addrspace (1 ) %ptr , <2 x i16 > %vec ) {
506
+ ; GFX6-LABEL: atomic_load_global_monotonic_i16_d16_hi_vector_insert:
507
+ ; GFX6: ; %bb.0:
508
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
509
+ ; GFX6-NEXT: s_mov_b32 s6, 0
510
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
511
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
512
+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
513
+ ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
514
+ ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
515
+ ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
516
+ ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
517
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
518
+ ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
519
+ ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
520
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
521
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
522
+ ;
368
523
; GFX7-LABEL: atomic_load_global_monotonic_i16_d16_hi_vector_insert:
369
524
; GFX7: ; %bb.0:
370
525
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -403,6 +558,19 @@ define <2 x i16> @atomic_load_global_monotonic_i16_d16_hi_vector_insert(ptr addr
403
558
}
404
559
405
560
define i32 @atomic_load_global_monotonic_i16_d16_lo_or (ptr addrspace (1 ) %ptr , i16 %high ) {
561
+ ; GFX6-LABEL: atomic_load_global_monotonic_i16_d16_lo_or:
562
+ ; GFX6: ; %bb.0:
563
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
564
+ ; GFX6-NEXT: s_mov_b32 s6, 0
565
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
566
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
567
+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
568
+ ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2
569
+ ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
570
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
571
+ ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
572
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
573
+ ;
406
574
; GFX7-LABEL: atomic_load_global_monotonic_i16_d16_lo_or:
407
575
; GFX7: ; %bb.0:
408
576
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -440,6 +608,22 @@ define i32 @atomic_load_global_monotonic_i16_d16_lo_or(ptr addrspace(1) %ptr, i1
440
608
}
441
609
442
610
define <2 x i16 > @atomic_load_global_monotonic_i16_d16_lo_vector_insert (ptr addrspace (1 ) %ptr , <2 x i16 > %vec ) {
611
+ ; GFX6-LABEL: atomic_load_global_monotonic_i16_d16_lo_vector_insert:
612
+ ; GFX6: ; %bb.0:
613
+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
614
+ ; GFX6-NEXT: s_mov_b32 s6, 0
615
+ ; GFX6-NEXT: s_mov_b32 s7, 0x100f000
616
+ ; GFX6-NEXT: s_mov_b64 s[4:5], 0
617
+ ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
618
+ ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
619
+ ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
620
+ ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
621
+ ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
622
+ ; GFX6-NEXT: s_waitcnt vmcnt(0)
623
+ ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
624
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
625
+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
626
+ ;
443
627
; GFX7-LABEL: atomic_load_global_monotonic_i16_d16_lo_vector_insert:
444
628
; GFX7: ; %bb.0:
445
629
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
0 commit comments