@@ -88,8 +88,7 @@ define dso_local void @read_only_gep_asc0(ptr nocapture noundef writeonly %out,
88
88
; COMMON-NEXT: [[ENTRY:.*:]]
89
89
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
90
90
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
91
- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
92
- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
91
+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
93
92
; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
94
93
; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
95
94
; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
@@ -115,8 +114,7 @@ define dso_local void @escape_ptr(ptr nocapture noundef readnone %out, ptr nound
115
114
; COMMON-NEXT: [[ENTRY:.*:]]
116
115
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
117
116
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
118
- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
119
- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
117
+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
120
118
; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
121
119
; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
122
120
; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S3]])
@@ -134,8 +132,7 @@ define dso_local void @escape_ptr_gep(ptr nocapture noundef readnone %out, ptr n
134
132
; COMMON-NEXT: [[ENTRY:.*:]]
135
133
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
136
134
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
137
- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
138
- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
135
+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
139
136
; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
140
137
; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
141
138
; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
@@ -155,8 +152,7 @@ define dso_local void @escape_ptr_store(ptr nocapture noundef writeonly %out, pt
155
152
; COMMON-NEXT: [[ENTRY:.*:]]
156
153
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
157
154
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
158
- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
159
- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
155
+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
160
156
; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
161
157
; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
162
158
; COMMON-NEXT: store ptr [[S3]], ptr [[OUT2]], align 8
@@ -174,8 +170,7 @@ define dso_local void @escape_ptr_gep_store(ptr nocapture noundef writeonly %out
174
170
; COMMON-NEXT: [[ENTRY:.*:]]
175
171
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
176
172
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
177
- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
178
- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
173
+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
179
174
; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
180
175
; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
181
176
; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S3]], i64 4
@@ -195,8 +190,7 @@ define dso_local void @escape_ptrtoint(ptr nocapture noundef writeonly %out, ptr
195
190
; COMMON-NEXT: [[ENTRY:.*:]]
196
191
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
197
192
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
198
- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
199
- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
193
+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
200
194
; COMMON-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
201
195
; COMMON-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
202
196
; COMMON-NEXT: [[I:%.*]] = ptrtoint ptr [[S3]] to i64
@@ -232,8 +226,7 @@ define dso_local void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr n
232
226
; COMMON-NEXT: [[ENTRY:.*:]]
233
227
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
234
228
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
235
- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
236
- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
229
+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
237
230
; COMMON-NEXT: [[IN1:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1)
238
231
; COMMON-NEXT: [[IN2:%.*]] = addrspacecast ptr addrspace(1) [[IN1]] to ptr
239
232
; COMMON-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S3]], ptr [[IN2]], i64 16, i1 true)
@@ -251,8 +244,7 @@ define dso_local void @copy_on_store(ptr nocapture noundef readonly %in, ptr noc
251
244
; COMMON-NEXT: [[BB:.*:]]
252
245
; COMMON-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4
253
246
; COMMON-NEXT: [[S4:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101)
254
- ; COMMON-NEXT: [[S5:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[S4]], align 4
255
- ; COMMON-NEXT: store [[STRUCT_S]] [[S5]], ptr [[S3]], align 4
247
+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S3]], ptr addrspace(101) align 4 [[S4]], i64 8, i1 false)
256
248
; COMMON-NEXT: [[IN1:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1)
257
249
; COMMON-NEXT: [[IN2:%.*]] = addrspacecast ptr addrspace(1) [[IN1]] to ptr
258
250
; COMMON-NEXT: [[I:%.*]] = load i32, ptr [[IN2]], align 4
@@ -273,12 +265,10 @@ define void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2,
273
265
; SM_60-NEXT: [[OUT8:%.*]] = addrspacecast ptr addrspace(1) [[OUT7]] to ptr
274
266
; SM_60-NEXT: [[INPUT24:%.*]] = alloca i32, align 4
275
267
; SM_60-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
276
- ; SM_60-NEXT: [[INPUT26:%.*]] = load i32, ptr addrspace(101) [[INPUT25]], align 4
277
- ; SM_60-NEXT: store i32 [[INPUT26]], ptr [[INPUT24]], align 4
268
+ ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT24]], ptr addrspace(101) align 4 [[INPUT25]], i64 4, i1 false)
278
269
; SM_60-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
279
270
; SM_60-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
280
- ; SM_60-NEXT: [[INPUT13:%.*]] = load i32, ptr addrspace(101) [[INPUT12]], align 4
281
- ; SM_60-NEXT: store i32 [[INPUT13]], ptr [[INPUT11]], align 4
271
+ ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
282
272
; SM_60-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]]
283
273
; SM_60-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
284
274
; SM_60-NEXT: store i32 [[VALLOADED]], ptr [[OUT8]], align 4
@@ -313,12 +303,10 @@ define void @test_select_write(ptr byval(i32) align 4 %input1, ptr byval(i32) %i
313
303
; COMMON-NEXT: [[OUT8:%.*]] = addrspacecast ptr addrspace(1) [[OUT7]] to ptr
314
304
; COMMON-NEXT: [[INPUT24:%.*]] = alloca i32, align 4
315
305
; COMMON-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
316
- ; COMMON-NEXT: [[INPUT26:%.*]] = load i32, ptr addrspace(101) [[INPUT25]], align 4
317
- ; COMMON-NEXT: store i32 [[INPUT26]], ptr [[INPUT24]], align 4
306
+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT24]], ptr addrspace(101) align 4 [[INPUT25]], i64 4, i1 false)
318
307
; COMMON-NEXT: [[INPUT11:%.*]] = alloca i32, align 4
319
308
; COMMON-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
320
- ; COMMON-NEXT: [[INPUT13:%.*]] = load i32, ptr addrspace(101) [[INPUT12]], align 4
321
- ; COMMON-NEXT: store i32 [[INPUT13]], ptr [[INPUT11]], align 4
309
+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false)
322
310
; COMMON-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]]
323
311
; COMMON-NEXT: store i32 1, ptr [[PTRNEW]], align 4
324
312
; COMMON-NEXT: ret void
@@ -337,12 +325,10 @@ define void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S)
337
325
; SM_60-NEXT: [[INOUT8:%.*]] = addrspacecast ptr addrspace(1) [[INOUT7]] to ptr
338
326
; SM_60-NEXT: [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8
339
327
; SM_60-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
340
- ; SM_60-NEXT: [[INPUT26:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[INPUT25]], align 8
341
- ; SM_60-NEXT: store [[STRUCT_S]] [[INPUT26]], ptr [[INPUT24]], align 4
328
+ ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
342
329
; SM_60-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
343
330
; SM_60-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
344
- ; SM_60-NEXT: [[INPUT13:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[INPUT12]], align 4
345
- ; SM_60-NEXT: store [[STRUCT_S]] [[INPUT13]], ptr [[INPUT11]], align 4
331
+ ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
346
332
; SM_60-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
347
333
; SM_60: [[FIRST]]:
348
334
; SM_60-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0
@@ -402,12 +388,10 @@ define void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr byval(%str
402
388
; COMMON-NEXT: [[BB:.*:]]
403
389
; COMMON-NEXT: [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8
404
390
; COMMON-NEXT: [[INPUT25:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
405
- ; COMMON-NEXT: [[INPUT26:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[INPUT25]], align 8
406
- ; COMMON-NEXT: store [[STRUCT_S]] [[INPUT26]], ptr [[INPUT24]], align 4
391
+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 8 [[INPUT24]], ptr addrspace(101) align 8 [[INPUT25]], i64 8, i1 false)
407
392
; COMMON-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4
408
393
; COMMON-NEXT: [[INPUT12:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
409
- ; COMMON-NEXT: [[INPUT13:%.*]] = load [[STRUCT_S]], ptr addrspace(101) [[INPUT12]], align 4
410
- ; COMMON-NEXT: store [[STRUCT_S]] [[INPUT13]], ptr [[INPUT11]], align 4
394
+ ; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false)
411
395
; COMMON-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]]
412
396
; COMMON: [[FIRST]]:
413
397
; COMMON-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0
0 commit comments