Skip to content

Commit bf79156

Browse files
authored
[AMDGPU] Add test for readfirstlane with i1 type (llvm#109657)
Add test for readfirstlane with i1 type to demonstrate the lowering works. Also simplify existing tests a bit - the declarations are not strictly needed anymore.
1 parent 09e94d0 commit bf79156

File tree

1 file changed

+124
-20
lines changed

1 file changed

+124
-20
lines changed

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll

Lines changed: 124 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,118 @@
22
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-SDAG -enable-var-scope %s
33
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=CHECK-GISEL -enable-var-scope %s
44

5-
declare i32 @llvm.amdgcn.readfirstlane(i32) #0
6-
declare i64 @llvm.amdgcn.readfirstlane.i64(i64) #0
7-
declare double @llvm.amdgcn.readfirstlane.f64(double) #0
5+
define void @test_readfirstlane_i1(ptr addrspace(1) %out, i1 %src) {
6+
; CHECK-SDAG-LABEL: test_readfirstlane_i1:
7+
; CHECK-SDAG: ; %bb.0:
8+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9+
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
10+
; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 1
11+
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4
12+
; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2
13+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
14+
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
15+
;
16+
; CHECK-GISEL-LABEL: test_readfirstlane_i1:
17+
; CHECK-GISEL: ; %bb.0:
18+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19+
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
20+
; CHECK-GISEL-NEXT: s_and_b32 s4, s4, 1
21+
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
22+
; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2
23+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
24+
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
25+
%readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %src)
26+
store i1 %readfirstlane, ptr addrspace(1) %out, align 4
27+
ret void
28+
}
29+
30+
define void @test_readfirstlane_i1_inreg(ptr addrspace(1) %out, i1 inreg %src) {
31+
; CHECK-SDAG-LABEL: test_readfirstlane_i1_inreg:
32+
; CHECK-SDAG: ; %bb.0:
33+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34+
; CHECK-SDAG-NEXT: s_and_b32 s4, s6, 1
35+
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4
36+
; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2
37+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
38+
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
39+
;
40+
; CHECK-GISEL-LABEL: test_readfirstlane_i1_inreg:
41+
; CHECK-GISEL: ; %bb.0:
42+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43+
; CHECK-GISEL-NEXT: s_and_b32 s4, s6, 1
44+
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
45+
; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2
46+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
47+
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
48+
%readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %src)
49+
store i1 %readfirstlane, ptr addrspace(1) %out, align 4
50+
ret void
51+
}
52+
53+
define void @test_readfirstlane_i1_select(ptr addrspace(1) %out, i32 %src, i32 %src1) {
54+
; CHECK-SDAG-LABEL: test_readfirstlane_i1_select:
55+
; CHECK-SDAG: ; %bb.0:
56+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
57+
; CHECK-SDAG-NEXT: v_cmp_lt_u32_e32 vcc, 42, v2
58+
; CHECK-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
59+
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v4
60+
; CHECK-SDAG-NEXT: s_bitcmp1_b32 s4, 0
61+
; CHECK-SDAG-NEXT: s_cselect_b64 vcc, -1, 0
62+
; CHECK-SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
63+
; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2
64+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
65+
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
66+
;
67+
; CHECK-GISEL-LABEL: test_readfirstlane_i1_select:
68+
; CHECK-GISEL: ; %bb.0:
69+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70+
; CHECK-GISEL-NEXT: v_cmp_lt_u32_e32 vcc, 42, v2
71+
; CHECK-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
72+
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v4
73+
; CHECK-GISEL-NEXT: s_and_b32 s4, 1, s4
74+
; CHECK-GISEL-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
75+
; CHECK-GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
76+
; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2
77+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
78+
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
79+
%cmp = icmp ugt i32 %src, 42
80+
%readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %cmp)
81+
%sel = select i1 %readfirstlane, i32 %src, i32 %src1
82+
store i32 %sel, ptr addrspace(1) %out, align 4
83+
ret void
84+
}
885

9-
define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) #1 {
86+
define void @test_readfirstlane_i1_load(ptr addrspace(1) %out, ptr addrspace(1) %in) {
87+
; CHECK-SDAG-LABEL: test_readfirstlane_i1_load:
88+
; CHECK-SDAG: ; %bb.0:
89+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90+
; CHECK-SDAG-NEXT: flat_load_ubyte v2, v[2:3]
91+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
92+
; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2
93+
; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 1
94+
; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4
95+
; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2
96+
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0)
97+
; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31]
98+
;
99+
; CHECK-GISEL-LABEL: test_readfirstlane_i1_load:
100+
; CHECK-GISEL: ; %bb.0:
101+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102+
; CHECK-GISEL-NEXT: flat_load_ubyte v2, v[2:3]
103+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
104+
; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2
105+
; CHECK-GISEL-NEXT: s_and_b32 s4, s4, 1
106+
; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4
107+
; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2
108+
; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0)
109+
; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31]
110+
%load = load i1, ptr addrspace(1) %in
111+
%readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %load)
112+
store i1 %readfirstlane, ptr addrspace(1) %out, align 4
113+
ret void
114+
}
115+
116+
define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) {
10117
; CHECK-SDAG-LABEL: test_readfirstlane_i32:
11118
; CHECK-SDAG: ; %bb.0:
12119
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -29,7 +136,7 @@ define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) #1 {
29136
ret void
30137
}
31138

32-
define void @test_readfirstlane_i64(ptr addrspace(1) %out, i64 %src) #1 {
139+
define void @test_readfirstlane_i64(ptr addrspace(1) %out, i64 %src) {
33140
; CHECK-SDAG-LABEL: test_readfirstlane_i64:
34141
; CHECK-SDAG: ; %bb.0:
35142
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -56,7 +163,7 @@ define void @test_readfirstlane_i64(ptr addrspace(1) %out, i64 %src) #1 {
56163
ret void
57164
}
58165

59-
define void @test_readfirstlane_f64(ptr addrspace(1) %out, double %src) #1 {
166+
define void @test_readfirstlane_f64(ptr addrspace(1) %out, double %src) {
60167
; CHECK-SDAG-LABEL: test_readfirstlane_f64:
61168
; CHECK-SDAG: ; %bb.0:
62169
; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -83,7 +190,7 @@ define void @test_readfirstlane_f64(ptr addrspace(1) %out, double %src) #1 {
83190
ret void
84191
}
85192

86-
define amdgpu_kernel void @test_readfirstlane_imm_i32(ptr addrspace(1) %out) #1 {
193+
define amdgpu_kernel void @test_readfirstlane_imm_i32(ptr addrspace(1) %out) {
87194
; CHECK-SDAG-LABEL: test_readfirstlane_imm_i32:
88195
; CHECK-SDAG: ; %bb.0:
89196
; CHECK-SDAG-NEXT: s_mov_b32 s0, 32
@@ -104,7 +211,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_i32(ptr addrspace(1) %out) #1
104211
ret void
105212
}
106213

107-
define amdgpu_kernel void @test_readfirstlane_imm_i64(ptr addrspace(1) %out) #1 {
214+
define amdgpu_kernel void @test_readfirstlane_imm_i64(ptr addrspace(1) %out) {
108215
; CHECK-SDAG-LABEL: test_readfirstlane_imm_i64:
109216
; CHECK-SDAG: ; %bb.0:
110217
; CHECK-SDAG-NEXT: s_mov_b64 s[0:1], 32
@@ -125,7 +232,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_i64(ptr addrspace(1) %out) #1
125232
ret void
126233
}
127234

128-
define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) #1 {
235+
define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) {
129236
; CHECK-SDAG-LABEL: test_readfirstlane_imm_f64:
130237
; CHECK-SDAG: ; %bb.0:
131238
; CHECK-SDAG-NEXT: s_mov_b32 s0, 0
@@ -148,7 +255,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) #1
148255
ret void
149256
}
150257

151-
define amdgpu_kernel void @test_readfirstlane_imm_fold_i32(ptr addrspace(1) %out) #1 {
258+
define amdgpu_kernel void @test_readfirstlane_imm_fold_i32(ptr addrspace(1) %out) {
152259
; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i32:
153260
; CHECK-SDAG: ; %bb.0:
154261
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -173,7 +280,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold_i32(ptr addrspace(1) %out
173280
ret void
174281
}
175282

176-
define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out) #1 {
283+
define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out) {
177284
; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i64:
178285
; CHECK-SDAG: ; %bb.0:
179286
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -201,7 +308,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out
201308
ret void
202309
}
203310

204-
define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out) #1 {
311+
define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out) {
205312
; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_f64:
206313
; CHECK-SDAG: ; %bb.0:
207314
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -230,7 +337,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out
230337
ret void
231338
}
232339

233-
define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) #1 {
340+
define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) {
234341
; CHECK-SDAG-LABEL: test_readfirstlane_m0:
235342
; CHECK-SDAG: ; %bb.0:
236343
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -262,7 +369,7 @@ define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) #1 {
262369
ret void
263370
}
264371

265-
define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i32(ptr addrspace(1) %out) #1 {
372+
define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i32(ptr addrspace(1) %out) {
266373
; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i32:
267374
; CHECK-SDAG: ; %bb.0:
268375
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -294,7 +401,7 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i32(ptr addrspace(1
294401
ret void
295402
}
296403

297-
define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1) %out) #1 {
404+
define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1) %out) {
298405
; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i64:
299406
; CHECK-SDAG: ; %bb.0:
300407
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -328,7 +435,7 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1
328435
ret void
329436
}
330437

331-
define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1) %out) #1 {
438+
define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1) %out) {
332439
; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_f64:
333440
; CHECK-SDAG: ; %bb.0:
334441
; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -362,7 +469,7 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1
362469
ret void
363470
}
364471

365-
define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) #1 {
472+
define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) {
366473
; CHECK-SDAG-LABEL: test_readfirstlane_fi:
367474
; CHECK-SDAG: ; %bb.0:
368475
; CHECK-SDAG-NEXT: s_add_u32 s0, s0, s15
@@ -593,6 +700,3 @@ define void @test_readfirstlane_v8i16(ptr addrspace(1) %out, <8 x i16> %src) {
593700
call void asm sideeffect "; use $0", "s"(<8 x i16> %x)
594701
ret void
595702
}
596-
597-
attributes #0 = { nounwind readnone convergent }
598-
attributes #1 = { nounwind }

0 commit comments

Comments
 (0)