From 4ade2b3e2c2ac8e046c82dc364525bbfdcba4d4e Mon Sep 17 00:00:00 2001 From: anikelal Date: Wed, 7 May 2025 16:40:18 +0530 Subject: [PATCH] Adding test for optnone and alwaysinline attributes assinged to opencl kernels stubs --- .../test/CodeGenOpenCL/opencl-kernel-call.cl | 271 +++++++++++++----- 1 file changed, 194 insertions(+), 77 deletions(-) diff --git a/clang/test/CodeGenOpenCL/opencl-kernel-call.cl b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl index a5b2bee127bd0..cd0839f9fcb33 100644 --- a/clang/test/CodeGenOpenCL/opencl-kernel-call.cl +++ b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl @@ -70,6 +70,14 @@ __attribute__((noinline)) kernel void callee_kern(global int *A){ *A = 1; } +__attribute__((optnone)) kernel void callee_kern_with_optnone_attribute(global int *A){ + *A = 1; +} + +__attribute__((always_inline)) kernel void callee_kern_with_alwaysinline_attribute(global int *A){ + *A = 1; +} + kernel void callee_kern_Mat3X3(global Mat3X3 *in, global Mat4X4 *out) { out[0] = foo(in[1]); } @@ -111,7 +119,8 @@ kernel void ext_KernelLargeTwoMember(struct LargeStructTwoMember u); kernel void caller_kern(global int* A, global Mat3X3 *mat3X3, global Mat4X4 *mat4X4, global Mat32X32 *mat32X32, global Mat64X64 *mat64X64){ callee_kern(A); ext_callee_kern(A); - + callee_kern_with_optnone_attribute(A); + callee_kern_with_alwaysinline_attribute(A); callee_kern_Mat3X3(mat3X3, mat4X4); callee_kern_Mat32X32(mat32X32, mat64X64); ext_callee_kern_Mat3X3(mat3X3, mat4X4); @@ -214,7 +223,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR4:[0-9]+]] +// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR6:[0-9]+]] // X86-NEXT: ret void // // @@ -230,6 +239,53 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // // // X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define spir_kernel void @callee_kern_with_optnone_attribute( +// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: entry: +// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_with_optnone_attribute(ptr noundef align 4 [[TMP0]]) #[[ATTR6]] +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone +// X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern_with_optnone_attribute( +// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: entry: +// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// X86-NEXT: store i32 1, ptr [[TMP0]], align 4 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: alwaysinline convergent norecurse nounwind +// X86-LABEL: define spir_kernel void @callee_kern_with_alwaysinline_attribute( +// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: entry: +// X86-NEXT: [[A_ADDR_I:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// X86-NEXT: store ptr [[TMP0]], ptr [[A_ADDR_I]], align 4 +// X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_I]], align 4 +// X86-NEXT: store i32 1, ptr [[TMP1]], align 4 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: alwaysinline convergent norecurse nounwind +// X86-LABEL: define void @__clang_ocl_kern_imp_callee_kern_with_alwaysinline_attribute( +// X86-SAME: ptr noundef align 4 [[A:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// X86-NEXT: entry: +// X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// X86-NEXT: store i32 1, ptr [[TMP0]], align 4 +// X86-NEXT: ret void +// +// +// X86: Function Attrs: convergent noinline norecurse nounwind optnone // X86-LABEL: define spir_kernel void @callee_kern_Mat3X3( // X86-SAME: ptr noundef align 4 [[IN:%.*]], ptr noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META9:![0-9]+]] !kernel_arg_type [[META10:![0-9]+]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11:![0-9]+]] { // X86-NEXT: entry: @@ -239,7 +295,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IN_ADDR]], align 4 // X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4 -// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -256,7 +312,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr [[TMP0]], i32 0 // X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IN_ADDR]], align 4 // X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr [[TMP1]], i32 1 -// X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[ARRAYIDX1]]) #[[ATTR4]] +// X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[ARRAYIDX1]]) #[[ATTR6]] // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false) // X86-NEXT: ret void // @@ -271,7 +327,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-NEXT: store ptr [[OUT]], ptr [[OUT_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IN_ADDR]], align 4 // X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUT_ADDR]], align 4 -// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -288,7 +344,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr [[TMP0]], i32 0 // X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IN_ADDR]], align 4 // X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32:%.*]], ptr [[TMP1]], i32 1 -// X86-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[ARRAYIDX1]]) #[[ATTR4]] +// X86-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[ARRAYIDX1]]) #[[ATTR6]] // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false) // X86-NEXT: ret void // @@ -297,7 +353,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-LABEL: define spir_kernel void @KernelOneMember( // X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: -// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -307,7 +363,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-NEXT: entry: // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false) -// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -315,7 +371,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-LABEL: define spir_kernel void @KernelLargeOneMember( // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: -// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -325,7 +381,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-NEXT: entry: // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false) -// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -333,7 +389,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-LABEL: define spir_kernel void @KernelTwoMember( // X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: -// X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -343,7 +399,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-NEXT: entry: // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false) -// X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -351,7 +407,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-LABEL: define spir_kernel void @KernelLargeTwoMember( // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] { // X86-NEXT: entry: -// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -361,7 +417,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-NEXT: entry: // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false) -// X86-NEXT: call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR4]] +// X86-NEXT: call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -384,7 +440,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-NEXT: [[TMP2:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4 // X86-NEXT: [[TMP3:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4 // X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4 -// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]], ptr noundef align 4 [[TMP2]], ptr noundef align 4 [[TMP3]], ptr noundef align 4 [[TMP4]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern(ptr noundef align 4 [[TMP0]], ptr noundef align 4 [[TMP1]], ptr noundef align 4 [[TMP2]], ptr noundef align 4 [[TMP3]], ptr noundef align 4 [[TMP4]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -392,6 +448,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-LABEL: define void @__clang_ocl_kern_imp_caller_kern( // X86-SAME: ptr noundef align 4 [[A:%.*]], ptr noundef align 4 [[MAT3X3:%.*]], ptr noundef align 4 [[MAT4X4:%.*]], ptr noundef align 4 [[MAT32X32:%.*]], ptr noundef align 4 [[MAT64X64:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META18]] !kernel_arg_access_qual [[META19]] !kernel_arg_type [[META20]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21]] { // X86-NEXT: entry: +// X86-NEXT: [[A_ADDR_I:%.*]] = alloca ptr, align 4 // X86-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[MAT3X3_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: [[MAT4X4_ADDR:%.*]] = alloca ptr, align 4 @@ -403,21 +460,27 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-NEXT: store ptr [[MAT32X32]], ptr [[MAT32X32_ADDR]], align 4 // X86-NEXT: store ptr [[MAT64X64]], ptr [[MAT64X64_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr noundef align 4 [[TMP0]]) #[[ATTR6]] // X86-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern(ptr noundef align 4 [[TMP1]]) #[[ATTR4]] -// X86-NEXT: [[TMP2:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4 -// X86-NEXT: [[TMP3:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4 -// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP2]], ptr noundef align 4 [[TMP3]]) #[[ATTR4]] -// X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4 -// X86-NEXT: [[TMP5:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4 -// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP4]], ptr noundef align 4 [[TMP5]]) #[[ATTR4]] -// X86-NEXT: [[TMP6:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4 -// X86-NEXT: [[TMP7:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4 -// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat3X3(ptr noundef align 4 [[TMP6]], ptr noundef align 4 [[TMP7]]) #[[ATTR4]] -// X86-NEXT: [[TMP8:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4 -// X86-NEXT: [[TMP9:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4 -// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat32X32(ptr noundef align 4 [[TMP8]], ptr noundef align 4 [[TMP9]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern(ptr noundef align 4 [[TMP1]]) #[[ATTR6]] +// X86-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_with_optnone_attribute(ptr noundef align 4 [[TMP2]]) #[[ATTR6]] +// X86-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// X86-NEXT: store ptr [[TMP3]], ptr [[A_ADDR_I]], align 4 +// X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_I]], align 4 +// X86-NEXT: store i32 1, ptr [[TMP4]], align 4 +// X86-NEXT: [[TMP5:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4 +// X86-NEXT: [[TMP6:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr noundef align 4 [[TMP5]], ptr noundef align 4 [[TMP6]]) #[[ATTR6]] +// X86-NEXT: [[TMP7:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4 +// X86-NEXT: [[TMP8:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr noundef align 4 [[TMP7]], ptr noundef align 4 [[TMP8]]) #[[ATTR6]] +// X86-NEXT: [[TMP9:%.*]] = load ptr, ptr [[MAT3X3_ADDR]], align 4 +// X86-NEXT: [[TMP10:%.*]] = load ptr, ptr [[MAT4X4_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat3X3(ptr noundef align 4 [[TMP9]], ptr noundef align 4 [[TMP10]]) #[[ATTR6]] +// X86-NEXT: [[TMP11:%.*]] = load ptr, ptr [[MAT32X32_ADDR]], align 4 +// X86-NEXT: [[TMP12:%.*]] = load ptr, ptr [[MAT64X64_ADDR]], align 4 +// X86-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat32X32(ptr noundef align 4 [[TMP11]], ptr noundef align 4 [[TMP12]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -428,7 +491,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr, align 4 // X86-NEXT: store ptr [[GLOBAL_STRUCTONEMEM]], ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4 -// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]], ptr noundef align 8 [[TMP0]], ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]], ptr noundef align 8 [[TMP0]], ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -442,10 +505,10 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[STRUCTONEMEM]], ptr align 4 [[TMP0]], i32 8, i1 false) // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[STRUCTTWOMEM]], ptr align 4 [[TMP1]], i32 16, i1 false) // X86-NEXT: store ptr [[GLOBAL_STRUCTONEMEM]], ptr [[GLOBAL_STRUCTONEMEM_ADDR]], align 4 -// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR4]] -// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR4]] -// X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR4]] -// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR6]] +// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[STRUCTONEMEM]]) #[[ATTR6]] +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR6]] +// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[STRUCTTWOMEM]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -453,7 +516,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-LABEL: define spir_kernel void @caller_kern3( // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[LARGESTRUCTONEMEM:%.*]], ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[LARGESTRUCTTWOMEM:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META26:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27:![0-9]+]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] { // X86-NEXT: entry: -// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern3(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[LARGESTRUCTONEMEM]], ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[LARGESTRUCTTWOMEM]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_caller_kern3(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[LARGESTRUCTONEMEM]], ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[LARGESTRUCTTWOMEM]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -465,10 +528,10 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // X86-NEXT: [[LARGESTRUCTTWOMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[LARGESTRUCTONEMEM]], ptr align 4 [[TMP0]], i32 800, i1 false) // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[LARGESTRUCTTWOMEM]], ptr align 4 [[TMP1]], i32 480, i1 false) -// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[LARGESTRUCTONEMEM]]) #[[ATTR4]] -// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[LARGESTRUCTTWOMEM]]) #[[ATTR4]] -// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[LARGESTRUCTONEMEM]]) #[[ATTR4]] -// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[LARGESTRUCTTWOMEM]]) #[[ATTR4]] +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[LARGESTRUCTONEMEM]]) #[[ATTR6]] +// X86-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[LARGESTRUCTTWOMEM]]) #[[ATTR6]] +// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeOneMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[LARGESTRUCTONEMEM]]) #[[ATTR6]] +// X86-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[LARGESTRUCTTWOMEM]]) #[[ATTR6]] // X86-NEXT: ret void // // @@ -562,7 +625,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR5:[0-9]+]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR7:[0-9]+]] // AMDGCN-NEXT: ret void // // @@ -578,6 +641,53 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // // // AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @callee_kern_with_optnone_attribute( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_with_optnone_attribute(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR7]] +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_callee_kern_with_optnone_attribute( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: store i32 1, ptr addrspace(1) [[TMP0]], align 4 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: alwaysinline convergent norecurse nounwind +// AMDGCN-LABEL: define dso_local amdgpu_kernel void @callee_kern_with_alwaysinline_attribute( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]]) #[[ATTR4:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[A_ADDR_I:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[TMP0]], ptr addrspace(5) [[A_ADDR_I]], align 8 +// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR_I]], align 8 +// AMDGCN-NEXT: store i32 1, ptr addrspace(1) [[TMP1]], align 4 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: alwaysinline convergent norecurse nounwind +// AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_callee_kern_with_alwaysinline_attribute( +// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]]) #[[ATTR5:[0-9]+]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META6]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7]] { +// AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// AMDGCN-NEXT: store ptr addrspace(1) [[A]], ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: store i32 1, ptr addrspace(1) [[TMP0]], align 4 +// AMDGCN-NEXT: ret void +// +// +// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone // AMDGCN-LABEL: define dso_local amdgpu_kernel void @callee_kern_Mat3X3( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META9:![0-9]+]] !kernel_arg_type [[META10:![0-9]+]] !kernel_arg_base_type [[META10]] !kernel_arg_type_qual [[META11:![0-9]+]] { // AMDGCN-NEXT: entry: @@ -587,7 +697,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // @@ -606,7 +716,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 -// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR5]] +// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR7]] // AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 // AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 @@ -624,7 +734,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // @@ -643,7 +753,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8 // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR7]] // AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) // AMDGCN-NEXT: ret void // @@ -657,7 +767,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 // AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP0]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP0]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // @@ -670,7 +780,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8 // AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 -// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // @@ -680,7 +790,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false) -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // @@ -690,7 +800,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) -// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // @@ -704,7 +814,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP1]], align 8 // AMDGCN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 // AMDGCN-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP3]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP2]], <2 x i32> [[TMP4]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP2]], <2 x i32> [[TMP4]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // @@ -721,7 +831,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP2]], align 8 // AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1 // AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 -// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP3]], <2 x i32> [[TMP5]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP3]], <2 x i32> [[TMP5]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // @@ -731,7 +841,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 480, i1 false) -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // @@ -741,7 +851,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: entry: // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) -// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // @@ -764,7 +874,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT4X4_ADDR]], align 8 // AMDGCN-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT32X32_ADDR]], align 8 // AMDGCN-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT64X64_ADDR]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]], ptr addrspace(1) noundef align 4 [[TMP2]], ptr addrspace(1) noundef align 4 [[TMP3]], ptr addrspace(1) noundef align 4 [[TMP4]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern(ptr addrspace(1) noundef align 4 [[TMP0]], ptr addrspace(1) noundef align 4 [[TMP1]], ptr addrspace(1) noundef align 4 [[TMP2]], ptr addrspace(1) noundef align 4 [[TMP3]], ptr addrspace(1) noundef align 4 [[TMP4]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // @@ -772,6 +882,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-LABEL: define dso_local void @__clang_ocl_kern_imp_caller_kern( // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[A:%.*]], ptr addrspace(1) noundef align 4 [[MAT3X3:%.*]], ptr addrspace(1) noundef align 4 [[MAT4X4:%.*]], ptr addrspace(1) noundef align 4 [[MAT32X32:%.*]], ptr addrspace(1) noundef align 4 [[MAT64X64:%.*]]) #[[ATTR3]] !kernel_arg_addr_space [[META18]] !kernel_arg_access_qual [[META19]] !kernel_arg_type [[META20]] !kernel_arg_base_type [[META20]] !kernel_arg_type_qual [[META21]] { // AMDGCN-NEXT: entry: +// AMDGCN-NEXT: [[A_ADDR_I:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[MAT3X3_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) // AMDGCN-NEXT: [[MAT4X4_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) @@ -783,21 +894,27 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: store ptr addrspace(1) [[MAT32X32]], ptr addrspace(5) [[MAT32X32_ADDR]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[MAT64X64]], ptr addrspace(5) [[MAT64X64_ADDR]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern(ptr addrspace(1) noundef align 4 [[TMP0]]) #[[ATTR7]] // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern(ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR5]] -// AMDGCN-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT3X3_ADDR]], align 8 -// AMDGCN-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT4X4_ADDR]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr addrspace(1) noundef align 4 [[TMP2]], ptr addrspace(1) noundef align 4 [[TMP3]]) #[[ATTR5]] -// AMDGCN-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT32X32_ADDR]], align 8 -// AMDGCN-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT64X64_ADDR]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr addrspace(1) noundef align 4 [[TMP4]], ptr addrspace(1) noundef align 4 [[TMP5]]) #[[ATTR5]] -// AMDGCN-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT3X3_ADDR]], align 8 -// AMDGCN-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT4X4_ADDR]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat3X3(ptr addrspace(1) noundef align 4 [[TMP6]], ptr addrspace(1) noundef align 4 [[TMP7]]) #[[ATTR5]] -// AMDGCN-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT32X32_ADDR]], align 8 -// AMDGCN-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT64X64_ADDR]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat32X32(ptr addrspace(1) noundef align 4 [[TMP8]], ptr addrspace(1) noundef align 4 [[TMP9]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern(ptr addrspace(1) noundef align 4 [[TMP1]]) #[[ATTR7]] +// AMDGCN-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_with_optnone_attribute(ptr addrspace(1) noundef align 4 [[TMP2]]) #[[ATTR7]] +// AMDGCN-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR]], align 8 +// AMDGCN-NEXT: store ptr addrspace(1) [[TMP3]], ptr addrspace(5) [[A_ADDR_I]], align 8 +// AMDGCN-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[A_ADDR_I]], align 8 +// AMDGCN-NEXT: store i32 1, ptr addrspace(1) [[TMP4]], align 4 +// AMDGCN-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT3X3_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT4X4_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat3X3(ptr addrspace(1) noundef align 4 [[TMP5]], ptr addrspace(1) noundef align 4 [[TMP6]]) #[[ATTR7]] +// AMDGCN-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT32X32_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT64X64_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_callee_kern_Mat32X32(ptr addrspace(1) noundef align 4 [[TMP7]], ptr addrspace(1) noundef align 4 [[TMP8]]) #[[ATTR7]] +// AMDGCN-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT3X3_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP10:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT4X4_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat3X3(ptr addrspace(1) noundef align 4 [[TMP9]], ptr addrspace(1) noundef align 4 [[TMP10]]) #[[ATTR7]] +// AMDGCN-NEXT: [[TMP11:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT32X32_ADDR]], align 8 +// AMDGCN-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[MAT64X64_ADDR]], align 8 +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_callee_kern_Mat32X32(ptr addrspace(1) noundef align 4 [[TMP11]], ptr addrspace(1) noundef align 4 [[TMP12]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // @@ -819,7 +936,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP3]], align 8 // AMDGCN-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1 // AMDGCN-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP5]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(<2 x i32> [[TMP2]], ptr addrspace(1) noundef align 8 [[TMP1]], <2 x i32> [[TMP4]], <2 x i32> [[TMP6]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(<2 x i32> [[TMP2]], ptr addrspace(1) noundef align 8 [[TMP1]], <2 x i32> [[TMP4]], <2 x i32> [[TMP6]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // @@ -839,20 +956,20 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: store ptr addrspace(1) [[GLOBAL_STRUCTONEMEM]], ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8 // AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP2]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelOneMember(<2 x i32> [[TMP2]]) #[[ATTR7]] // AMDGCN-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE2]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelOneMember(<2 x i32> [[TMP3]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelOneMember(<2 x i32> [[TMP3]]) #[[ATTR7]] // AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8 // AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1 // AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR7]] // AMDGCN-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP8]], align 8 // AMDGCN-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1 // AMDGCN-NEXT: [[TMP11:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP10]], align 8 -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelTwoMember(<2 x i32> [[TMP9]], <2 x i32> [[TMP11]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelTwoMember(<2 x i32> [[TMP9]], <2 x i32> [[TMP11]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // @@ -864,7 +981,7 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: [[LARGESTRUCTTWOMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[LARGESTRUCTONEMEM]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[LARGESTRUCTTWOMEM]], ptr addrspace(4) align 8 [[TMP1]], i64 480, i1 false) -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern3(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]], ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern3(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]], ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // // @@ -876,10 +993,10 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct // AMDGCN-NEXT: [[LARGESTRUCTTWOMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[LARGESTRUCTONEMEM]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) // AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[LARGESTRUCTTWOMEM]], ptr addrspace(5) align 8 [[TMP1]], i64 480, i1 false) -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]]) #[[ATTR5]] -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR5]] -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]]) #[[ATTR5]] -// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR5]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]]) #[[ATTR7]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR7]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]]) #[[ATTR7]] +// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_ext_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR7]] // AMDGCN-NEXT: ret void // //.