1
1
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2
- ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -passes=amdgpu-attributor < %s | FileCheck -check-prefixes =HSA,ATTRIBUTOR_HSA %s
2
+ ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -passes=amdgpu-attributor < %s | FileCheck -check-prefix =HSA %s
3
3
4
4
declare void @llvm.memcpy.p1.p4.i32 (ptr addrspace (1 ) nocapture , ptr addrspace (4 ) nocapture , i32 , i1 ) #0
5
5
@@ -26,30 +26,30 @@ define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 {
26
26
}
27
27
28
28
define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast () #1 {
29
- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast
30
- ; ATTRIBUTOR_HSA -SAME: () #[[ATTR2:[0-9]+]] {
31
- ; ATTRIBUTOR_HSA -NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4)), align 4
32
- ; ATTRIBUTOR_HSA -NEXT: ret void
29
+ ; HSA -LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast
30
+ ; HSA -SAME: () #[[ATTR2:[0-9]+]] {
31
+ ; HSA -NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4)), align 4
32
+ ; HSA -NEXT: ret void
33
33
;
34
34
store i32 7 , ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) null to ptr addrspace (4 ))
35
35
ret void
36
36
}
37
37
38
38
define amdgpu_kernel void @store_constant_cast_group_gv_to_flat () #1 {
39
- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat
40
- ; ATTRIBUTOR_HSA -SAME: () #[[ATTR2]] {
41
- ; ATTRIBUTOR_HSA -NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4)), align 4
42
- ; ATTRIBUTOR_HSA -NEXT: ret void
39
+ ; HSA -LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat
40
+ ; HSA -SAME: () #[[ATTR2]] {
41
+ ; HSA -NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4)), align 4
42
+ ; HSA -NEXT: ret void
43
43
;
44
44
store i32 7 , ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.i32 to ptr addrspace (4 ))
45
45
ret void
46
46
}
47
47
48
48
define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat () #1 {
49
- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat
50
- ; ATTRIBUTOR_HSA -SAME: () #[[ATTR2]] {
51
- ; ATTRIBUTOR_HSA -NEXT: store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
52
- ; ATTRIBUTOR_HSA -NEXT: ret void
49
+ ; HSA -LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat
50
+ ; HSA -SAME: () #[[ATTR2]] {
51
+ ; HSA -NEXT: store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
52
+ ; HSA -NEXT: ret void
53
53
;
54
54
store i32 7 , ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 )
55
55
ret void
@@ -76,36 +76,36 @@ define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 {
76
76
}
77
77
78
78
define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat (ptr addrspace (1 ) %out ) #1 {
79
- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat
80
- ; ATTRIBUTOR_HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
81
- ; ATTRIBUTOR_HSA -NEXT: [[VAL:%.*]] = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
82
- ; ATTRIBUTOR_HSA -NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
83
- ; ATTRIBUTOR_HSA -NEXT: ret void
79
+ ; HSA -LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat
80
+ ; HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
81
+ ; HSA -NEXT: [[VAL:%.*]] = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
82
+ ; HSA -NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
83
+ ; HSA -NEXT: ret void
84
84
;
85
85
%val = load i32 , ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 )
86
86
store i32 %val , ptr addrspace (1 ) %out
87
87
ret void
88
88
}
89
89
90
90
define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat (ptr addrspace (1 ) %out ) #1 {
91
- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat
92
- ; ATTRIBUTOR_HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
93
- ; ATTRIBUTOR_HSA -NEXT: [[VAL:%.*]] = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst, align 4
94
- ; ATTRIBUTOR_HSA -NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
95
- ; ATTRIBUTOR_HSA -NEXT: ret void
91
+ ; HSA -LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat
92
+ ; HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
93
+ ; HSA -NEXT: [[VAL:%.*]] = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst, align 4
94
+ ; HSA -NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
95
+ ; HSA -NEXT: ret void
96
96
;
97
97
%val = atomicrmw add ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 ), i32 1 seq_cst
98
98
store i32 %val , ptr addrspace (1 ) %out
99
99
ret void
100
100
}
101
101
102
102
define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat (ptr addrspace (1 ) %out ) #1 {
103
- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat
104
- ; ATTRIBUTOR_HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
105
- ; ATTRIBUTOR_HSA -NEXT: [[VAL:%.*]] = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
106
- ; ATTRIBUTOR_HSA -NEXT: [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
107
- ; ATTRIBUTOR_HSA -NEXT: store i32 [[VAL0]], ptr addrspace(1) [[OUT]], align 4
108
- ; ATTRIBUTOR_HSA -NEXT: ret void
103
+ ; HSA -LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat
104
+ ; HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
105
+ ; HSA -NEXT: [[VAL:%.*]] = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
106
+ ; HSA -NEXT: [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
107
+ ; HSA -NEXT: store i32 [[VAL0]], ptr addrspace(1) [[OUT]], align 4
108
+ ; HSA -NEXT: ret void
109
109
;
110
110
%val = cmpxchg ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 ), i32 0 , i32 1 seq_cst seq_cst
111
111
%val0 = extractvalue { i32 , i1 } %val , 0
@@ -114,52 +114,52 @@ define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(ptr addrsp
114
114
}
115
115
116
116
define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat (ptr addrspace (1 ) %out ) #1 {
117
- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat
118
- ; ATTRIBUTOR_HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
119
- ; ATTRIBUTOR_HSA -NEXT: call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 [[OUT]], ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false)
120
- ; ATTRIBUTOR_HSA -NEXT: ret void
117
+ ; HSA -LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat
118
+ ; HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
119
+ ; HSA -NEXT: call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 [[OUT]], ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false)
120
+ ; HSA -NEXT: ret void
121
121
;
122
122
call void @llvm.memcpy.p1.p4.i32 (ptr addrspace (1 ) align 4 %out , ptr addrspace (4 ) align 4 getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 ), i32 32 , i1 false )
123
123
ret void
124
124
}
125
125
126
126
; Can't just search the pointer value
127
127
define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat (ptr addrspace (1 ) %out ) #1 {
128
- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat
129
- ; ATTRIBUTOR_HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
130
- ; ATTRIBUTOR_HSA -NEXT: store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) [[OUT]], align 8
131
- ; ATTRIBUTOR_HSA -NEXT: ret void
128
+ ; HSA -LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat
129
+ ; HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
130
+ ; HSA -NEXT: store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) [[OUT]], align 8
131
+ ; HSA -NEXT: ret void
132
132
;
133
133
store ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 ), ptr addrspace (1 ) %out
134
134
ret void
135
135
}
136
136
137
137
; Can't just search pointer types
138
138
define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat (ptr addrspace (1 ) %out ) #1 {
139
- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
140
- ; ATTRIBUTOR_HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
141
- ; ATTRIBUTOR_HSA -NEXT: store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 8
142
- ; ATTRIBUTOR_HSA -NEXT: ret void
139
+ ; HSA -LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
140
+ ; HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
141
+ ; HSA -NEXT: store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 8
142
+ ; HSA -NEXT: ret void
143
143
;
144
144
store i64 ptrtoint (ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 ) to i64 ), ptr addrspace (1 ) %out
145
145
ret void
146
146
}
147
147
148
148
; Cast group to flat, do GEP, cast back to group
149
149
define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group () #1 {
150
- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group
151
- ; ATTRIBUTOR_HSA -SAME: () #[[ATTR2]] {
152
- ; ATTRIBUTOR_HSA -NEXT: store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)), align 4
153
- ; ATTRIBUTOR_HSA -NEXT: ret void
150
+ ; HSA -LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group
151
+ ; HSA -SAME: () #[[ATTR2]] {
152
+ ; HSA -NEXT: store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)), align 4
153
+ ; HSA -NEXT: ret void
154
154
;
155
155
store i32 7 , ptr addrspace (3 ) addrspacecast (ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 ) to ptr addrspace (3 ))
156
156
ret void
157
157
}
158
158
159
159
define ptr addrspace (3 ) @ret_constant_cast_group_gv_gep_to_flat_to_group () #1 {
160
- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
161
- ; ATTRIBUTOR_HSA -SAME: () #[[ATTR2]] {
162
- ; ATTRIBUTOR_HSA -NEXT: ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
160
+ ; HSA -LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
161
+ ; HSA -SAME: () #[[ATTR2]] {
162
+ ; HSA -NEXT: ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
163
163
;
164
164
ret ptr addrspace (3 ) addrspacecast (ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 ) to ptr addrspace (3 ))
165
165
}
@@ -170,11 +170,9 @@ attributes #1 = { nounwind }
170
170
!llvm.module.flags = !{!0 }
171
171
!0 = !{i32 1 , !"amdhsa_code_object_version" , i32 500 }
172
172
;.
173
+ ; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
174
+ ; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
175
+ ; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
173
176
;.
174
- ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
175
- ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
176
- ; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
177
- ;.
178
- ;.
179
- ; ATTRIBUTOR_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
177
+ ; HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
180
178
;.
0 commit comments