|
83 | 83 |
|
84 | 84 | // CUDA: define internal void @.cuda.globals_reg(ptr %0) section ".text.startup" { |
85 | 85 | // CUDA-NEXT: entry: |
86 | | -// CUDA-NEXT: br i1 icmp ne (ptr @__start_cuda_offloading_entries, ptr @__stop_cuda_offloading_entries), label %while.entry, label %while.end |
| 86 | +// CUDA-NEXT: %1 = icmp ne ptr @__start_cuda_offloading_entries, @__stop_cuda_offloading_entries |
| 87 | +// CUDA-NEXT: br i1 %1, label %while.entry, label %while.end |
87 | 88 |
|
88 | 89 | // CUDA: while.entry: |
89 | | -// CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %11, %if.end ] |
90 | | -// CUDA-NEXT: %1 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 0 |
91 | | -// CUDA-NEXT: %addr = load ptr, ptr %1, align 8 |
92 | | -// CUDA-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 1 |
93 | | -// CUDA-NEXT: %name = load ptr, ptr %2, align 8 |
94 | | -// CUDA-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 2 |
95 | | -// CUDA-NEXT: %size = load i64, ptr %3, align 4 |
96 | | -// CUDA-NEXT: %4 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 3 |
97 | | -// CUDA-NEXT: %flags = load i32, ptr %4, align 4 |
98 | | -// CUDA-NEXT: %5 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 4 |
99 | | -// CUDA-NEXT: %textype = load i32, ptr %5, align 4 |
| 90 | +// CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %12, %if.end ] |
| 91 | +// CUDA-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 0 |
| 92 | +// CUDA-NEXT: %addr = load ptr, ptr %2, align 8 |
| 93 | +// CUDA-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 1 |
| 94 | +// CUDA-NEXT: %name = load ptr, ptr %3, align 8 |
| 95 | +// CUDA-NEXT: %4 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 2 |
| 96 | +// CUDA-NEXT: %size = load i64, ptr %4, align 4 |
| 97 | +// CUDA-NEXT: %5 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 3 |
| 98 | +// CUDA-NEXT: %flags = load i32, ptr %5, align 4 |
| 99 | +// CUDA-NEXT: %6 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 4 |
| 100 | +// CUDA-NEXT: %textype = load i32, ptr %6, align 4 |
100 | 101 | // CUDA-NEXT: %type = and i32 %flags, 7 |
101 | | -// CUDA-NEXT: %6 = and i32 %flags, 8 |
102 | | -// CUDA-NEXT: %extern = lshr i32 %6, 3 |
103 | | -// CUDA-NEXT: %7 = and i32 %flags, 16 |
104 | | -// CUDA-NEXT: %constant = lshr i32 %7, 4 |
105 | | -// CUDA-NEXT: %8 = and i32 %flags, 32 |
106 | | -// CUDA-NEXT: %normalized = lshr i32 %8, 5 |
107 | | -// CUDA-NEXT: %9 = icmp eq i64 %size, 0 |
108 | | -// CUDA-NEXT: br i1 %9, label %if.then, label %if.else |
| 102 | +// CUDA-NEXT: %7 = and i32 %flags, 8 |
| 103 | +// CUDA-NEXT: %extern = lshr i32 %7, 3 |
| 104 | +// CUDA-NEXT: %8 = and i32 %flags, 16 |
| 105 | +// CUDA-NEXT: %constant = lshr i32 %8, 4 |
| 106 | +// CUDA-NEXT: %9 = and i32 %flags, 32 |
| 107 | +// CUDA-NEXT: %normalized = lshr i32 %9, 5 |
| 108 | +// CUDA-NEXT: %10 = icmp eq i64 %size, 0 |
| 109 | +// CUDA-NEXT: br i1 %10, label %if.then, label %if.else |
109 | 110 |
|
110 | 111 | // CUDA: if.then: |
111 | | -// CUDA-NEXT: %10 = call i32 @__cudaRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null) |
| 112 | +// CUDA-NEXT: %11 = call i32 @__cudaRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null) |
112 | 113 | // CUDA-NEXT: br label %if.end |
113 | 114 |
|
114 | 115 | // CUDA: if.else: |
|
133 | 134 | // CUDA-NEXT: br label %if.end |
134 | 135 |
|
135 | 136 | // CUDA: if.end: |
136 | | -// CUDA-NEXT: %11 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1 |
137 | | -// CUDA-NEXT: %12 = icmp eq ptr %11, @__stop_cuda_offloading_entries |
138 | | -// CUDA-NEXT: br i1 %12, label %while.end, label %while.entry |
| 137 | +// CUDA-NEXT: %12 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1 |
| 138 | +// CUDA-NEXT: %13 = icmp eq ptr %12, @__stop_cuda_offloading_entries |
| 139 | +// CUDA-NEXT: br i1 %13, label %while.end, label %while.entry |
139 | 140 |
|
140 | 141 | // CUDA: while.end: |
141 | 142 | // CUDA-NEXT: ret void |
|
182 | 183 |
|
183 | 184 | // HIP: define internal void @.hip.globals_reg(ptr %0) section ".text.startup" { |
184 | 185 | // HIP-NEXT: entry: |
185 | | -// HIP-NEXT: br i1 icmp ne (ptr @__start_hip_offloading_entries, ptr @__stop_hip_offloading_entries), label %while.entry, label %while.end |
| 186 | +// HIP-NEXT: %1 = icmp ne ptr @__start_hip_offloading_entries, @__stop_hip_offloading_entries |
| 187 | +// HIP-NEXT: br i1 %1, label %while.entry, label %while.end |
186 | 188 |
|
187 | 189 | // HIP: while.entry: |
188 | | -// HIP-NEXT: %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %11, %if.end ] |
189 | | -// HIP-NEXT: %1 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 0 |
190 | | -// HIP-NEXT: %addr = load ptr, ptr %1, align 8 |
191 | | -// HIP-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 1 |
192 | | -// HIP-NEXT: %name = load ptr, ptr %2, align 8 |
193 | | -// HIP-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 2 |
194 | | -// HIP-NEXT: %size = load i64, ptr %3, align 4 |
195 | | -// HIP-NEXT: %4 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 3 |
196 | | -// HIP-NEXT: %flags = load i32, ptr %4, align 4 |
197 | | -// HIP-NEXT: %5 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 4 |
198 | | -// HIP-NEXT: %textype = load i32, ptr %5, align 4 |
| 190 | +// HIP-NEXT: %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %12, %if.end ] |
| 191 | +// HIP-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 0 |
| 192 | +// HIP-NEXT: %addr = load ptr, ptr %2, align 8 |
| 193 | +// HIP-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 1 |
| 194 | +// HIP-NEXT: %name = load ptr, ptr %3, align 8 |
| 195 | +// HIP-NEXT: %4 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 2 |
| 196 | +// HIP-NEXT: %size = load i64, ptr %4, align 4 |
| 197 | +// HIP-NEXT: %5 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 3 |
| 198 | +// HIP-NEXT: %flags = load i32, ptr %5, align 4 |
| 199 | +// HIP-NEXT: %6 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 4 |
| 200 | +// HIP-NEXT: %textype = load i32, ptr %6, align 4 |
199 | 201 | // HIP-NEXT: %type = and i32 %flags, 7 |
200 | | -// HIP-NEXT: %6 = and i32 %flags, 8 |
201 | | -// HIP-NEXT: %extern = lshr i32 %6, 3 |
202 | | -// HIP-NEXT: %7 = and i32 %flags, 16 |
203 | | -// HIP-NEXT: %constant = lshr i32 %7, 4 |
204 | | -// HIP-NEXT: %8 = and i32 %flags, 32 |
205 | | -// HIP-NEXT: %normalized = lshr i32 %8, 5 |
206 | | -// HIP-NEXT: %9 = icmp eq i64 %size, 0 |
207 | | -// HIP-NEXT: br i1 %9, label %if.then, label %if.else |
| 202 | +// HIP-NEXT: %7 = and i32 %flags, 8 |
| 203 | +// HIP-NEXT: %extern = lshr i32 %7, 3 |
| 204 | +// HIP-NEXT: %8 = and i32 %flags, 16 |
| 205 | +// HIP-NEXT: %constant = lshr i32 %8, 4 |
| 206 | +// HIP-NEXT: %9 = and i32 %flags, 32 |
| 207 | +// HIP-NEXT: %normalized = lshr i32 %9, 5 |
| 208 | +// HIP-NEXT: %10 = icmp eq i64 %size, 0 |
| 209 | +// HIP-NEXT: br i1 %10, label %if.then, label %if.else |
208 | 210 |
|
209 | 211 | // HIP: if.then: |
210 | | -// HIP-NEXT: %10 = call i32 @__hipRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null) |
| 212 | +// HIP-NEXT: %11 = call i32 @__hipRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null) |
211 | 213 | // HIP-NEXT: br label %if.end |
212 | 214 |
|
213 | 215 | // HIP: if.else: |
|
234 | 236 | // HIP-NEXT: br label %if.end |
235 | 237 |
|
236 | 238 | // HIP: if.end: |
237 | | -// HIP-NEXT: %11 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1 |
238 | | -// HIP-NEXT: %12 = icmp eq ptr %11, @__stop_hip_offloading_entries |
239 | | -// HIP-NEXT: br i1 %12, label %while.end, label %while.entry |
| 239 | +// HIP-NEXT: %12 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1 |
| 240 | +// HIP-NEXT: %13 = icmp eq ptr %12, @__stop_hip_offloading_entries |
| 241 | +// HIP-NEXT: br i1 %13, label %while.end, label %while.entry |
240 | 242 |
|
241 | 243 | // HIP: while.end: |
242 | 244 | // HIP-NEXT: ret void |
|
0 commit comments