6
6
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
7
7
target triple = "spir64-unknown-unknown-sycldevice"
8
8
9
- %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" = type { <16 x i32 > }
9
+ %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" = type { <16 x i32 > }
10
10
11
11
$"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" = comdat any
12
12
13
13
; CHECK: [[NEWGLOBAL:[@a-zA-Z0-9_]*]] = dso_local global <16 x i32> zeroinitializer, align 64 #0
14
- @0 = dso_local global %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" zeroinitializer , align 64 #0
14
+ @0 = dso_local global %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" zeroinitializer , align 64 #0
15
15
16
16
; Function Attrs: norecurse
17
17
define weak_odr dso_local spir_kernel void @"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE4Test" (i32 addrspace (1 )* %_arg_ ) local_unnamed_addr #1 comdat !kernel_arg_addr_space !8 !kernel_arg_access_qual !9 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !11 !sycl_explicit_simd !12 !intel_reqd_sub_group_size !8 {
18
18
entry:
19
- %vc.i = alloca %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , align 64
20
- %agg.tmp.i = alloca %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , align 64
19
+ %vc.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , align 64
20
+ %agg.tmp.i = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , align 64
21
21
%call.esimd.i.i.i.i.i = call <3 x i32 > @llvm.genx.local.id.v3i32 () #5
22
22
%local_id.y.i.i.i.i.i = extractelement <3 x i32 > %call.esimd.i.i.i.i.i , i32 1
23
23
%local_id.y.cast.ty.i.i.i.i.i = zext i32 %local_id.y.i.i.i.i.i to i64
@@ -36,15 +36,15 @@ entry:
36
36
%group.id.x.cast.ty.i.i.i.i.i = zext i32 %group.id.x.i.i.i.i.i to i64
37
37
%mul.i4.i.i.i.i = mul nuw i64 %group.id.x.cast.ty.i.i.i.i.i , %wgsize.x.cast.ty.i.i.i.i.i
38
38
%add.i5.i.i.i.i = add i64 %mul.i4.i.i.i.i , %local_id.x.cast.ty.i.i.i.i.i
39
- %0 = bitcast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * %agg.tmp.i to i8*
39
+ %0 = bitcast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * %agg.tmp.i to i8*
40
40
call void @llvm.lifetime.start.p0i8 (i64 64 , i8* nonnull %0 )
41
- %1 = bitcast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * %vc.i to i8*
41
+ %1 = bitcast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * %vc.i to i8*
42
42
call void @llvm.lifetime.start.p0i8 (i64 64 , i8* nonnull %1 ) #5
43
43
%conv.i = trunc i64 %add.i5.i.i.i.i to i32
44
- %2 = addrspacecast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * %vc.i to %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )*
44
+ %2 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * %vc.i to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )*
45
45
%splat.splatinsert.i.i = insertelement <16 x i32 > undef , i32 %conv.i , i32 0
46
46
%splat.splat.i.i = shufflevector <16 x i32 > %splat.splatinsert.i.i , <16 x i32 > undef , <16 x i32 > zeroinitializer
47
- %M_data.i13.i = getelementptr inbounds %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )* %2 , i64 0 , i32 0
47
+ %M_data.i13.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )* %2 , i64 0 , i32 0
48
48
store <16 x i32 > %splat.splat.i.i , <16 x i32 > addrspace (4 )* %M_data.i13.i , align 64 , !tbaa !13
49
49
%conv3.i = trunc i64 %add.i.i.i.i.i to i32
50
50
%splat.splatinsert.i20.i = insertelement <8 x i32 > undef , i32 %conv3.i , i32 0
@@ -56,17 +56,17 @@ entry:
56
56
%..i = select i1 %cmp.i , i64 %add.i5.i.i.i.i , i64 %add.i.i.i.i.i
57
57
%conv9.i = trunc i64 %..i to i32
58
58
; CHECK: store <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> addrspace(4)* addrspacecast (<16 x i32>* getelementptr inbounds ({{.+}}, {{.+}}* bitcast (<16 x i32>* [[NEWGLOBAL]] to {{.+}}*), i64 0, i32 0) to <16 x i32> addrspace(4)*), align 64, !tbaa.struct !16
59
- store <16 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 >, <16 x i32 > addrspace (4 )* addrspacecast (<16 x i32 >* getelementptr inbounds (%"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * @0 , i64 0 , i32 0 ) to <16 x i32 > addrspace (4 )*), align 64 , !tbaa.struct !16
59
+ store <16 x i32 > <i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 , i32 1 >, <16 x i32 > addrspace (4 )* addrspacecast (<16 x i32 >* getelementptr inbounds (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * @0 , i64 0 , i32 0 ) to <16 x i32 > addrspace (4 )*), align 64 , !tbaa.struct !16
60
60
%mul.i = shl nsw i32 %conv9.i , 4
61
61
%idx.ext.i = sext i32 %mul.i to i64
62
62
%add.ptr.i16 = getelementptr inbounds i32 , i32 addrspace (1 )* %_arg_ , i64 %idx.ext.i
63
63
%add.ptr.i = addrspacecast i32 addrspace (1 )* %add.ptr.i16 to i32 addrspace (4 )*
64
- %3 = addrspacecast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * %agg.tmp.i to %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )*
64
+ %3 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * %agg.tmp.i to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )*
65
65
%call.esimd.i.i.i = call <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (<16 x i32 > addrspace (4 )* %M_data.i13.i ) #5
66
- %M_data.i2.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )* %3 , i64 0 , i32 0
66
+ %M_data.i2.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )* %3 , i64 0 , i32 0
67
67
call void @llvm.genx.vstore.v16i32.p4v16i32 (<16 x i32 > %call.esimd.i.i.i , <16 x i32 > addrspace (4 )* %M_data.i2.i.i ) #5
68
- call spir_func void @_Z3fooPiN2cl4sycl5intel3gpu4simdIiLi16EEE (i32 addrspace (4 )* %add.ptr.i , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * nonnull %agg.tmp.i ) #5
69
- store <16 x i32 > <i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 >, <16 x i32 > addrspace (4 )* addrspacecast (<16 x i32 >* getelementptr inbounds (%"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * @0 , i64 0 , i32 0 ) to <16 x i32 > addrspace (4 )*), align 64 , !tbaa.struct !16
68
+ call spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE (i32 addrspace (4 )* %add.ptr.i , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * nonnull %agg.tmp.i ) #5
69
+ store <16 x i32 > <i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 , i32 2 >, <16 x i32 > addrspace (4 )* addrspacecast (<16 x i32 >* getelementptr inbounds (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * @0 , i64 0 , i32 0 ) to <16 x i32 > addrspace (4 )*), align 64 , !tbaa.struct !16
70
70
call void @llvm.lifetime.end.p0i8 (i64 64 , i8* nonnull %1 ) #5
71
71
call void @llvm.lifetime.end.p0i8 (i64 64 , i8* nonnull %0 )
72
72
ret void
@@ -79,17 +79,17 @@ declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #2
79
79
declare void @llvm.lifetime.end.p0i8 (i64 immarg %0 , i8* nocapture %1 ) #2
80
80
81
81
; Function Attrs: noinline norecurse nounwind
82
- define dso_local spir_func void @_Z3fooPiN2cl4sycl5intel3gpu4simdIiLi16EEE (i32 addrspace (4 )* %C , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * %v ) local_unnamed_addr #3 !sycl_explicit_simd !12 {
82
+ define dso_local spir_func void @_Z3fooPiN2cl4sycl5INTEL3gpu4simdIiLi16EEE (i32 addrspace (4 )* %C , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * %v ) local_unnamed_addr #3 !sycl_explicit_simd !12 {
83
83
entry:
84
- %agg.tmp = alloca %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , align 64
85
- %0 = addrspacecast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * %v to %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )*
86
- %1 = addrspacecast %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * %agg.tmp to %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )*
87
- %M_data.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )* %0 , i64 0 , i32 0
84
+ %agg.tmp = alloca %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , align 64
85
+ %0 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * %v to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )*
86
+ %1 = addrspacecast %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * %agg.tmp to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )*
87
+ %M_data.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )* %0 , i64 0 , i32 0
88
88
%call.esimd.i.i = call <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (<16 x i32 > addrspace (4 )* %M_data.i.i ), !noalias !17
89
89
; CHECK: {{.+}} = call <16 x i32> @llvm.genx.vload.v16i32.p4v16i32(<16 x i32> addrspace(4)* getelementptr ({{.+}}, {{.+}} addrspace(4)* addrspacecast ({{.+}}* bitcast (<16 x i32>* [[NEWGLOBAL]] to {{.+}}*) to {{.+}} addrspace(4)*), i64 0, i32 0)), !noalias !17
90
- %call.esimd.i8.i = call <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (<16 x i32 > addrspace (4 )* getelementptr (%"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )* addrspacecast (%"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" * @0 to %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )*), i64 0 , i32 0 )), !noalias !17
90
+ %call.esimd.i8.i = call <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (<16 x i32 > addrspace (4 )* getelementptr (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )* addrspacecast (%"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" * @0 to %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )*), i64 0 , i32 0 )), !noalias !17
91
91
%add.i = add <16 x i32 > %call.esimd.i8.i , %call.esimd.i.i
92
- %M_data.i.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" , %"class._ZTSN2cl4sycl5intel3gpu4simdIiLi16EEE .cl::sycl::intel ::gpu::simd" addrspace (4 )* %1 , i64 0 , i32 0
92
+ %M_data.i.i.i = getelementptr inbounds %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" , %"class._ZTSN2cl4sycl5INTEL3gpu4simdIiLi16EEE .cl::sycl::INTEL ::gpu::simd" addrspace (4 )* %1 , i64 0 , i32 0
93
93
call void @llvm.genx.vstore.v16i32.p4v16i32 (<16 x i32 > %add.i , <16 x i32 > addrspace (4 )* %M_data.i.i.i )
94
94
%2 = ptrtoint i32 addrspace (4 )* %C to i64
95
95
%call.esimd.i.i2 = call <16 x i32 > @llvm.genx.vload.v16i32.p4v16i32 (<16 x i32 > addrspace (4 )* %M_data.i.i.i )
@@ -153,8 +153,8 @@ attributes #5 = { nounwind }
153
153
!15 = !{!"Simple C++ TBAA" }
154
154
!16 = !{i64 0 , i64 64 , !13 }
155
155
!17 = !{!18 }
156
- !18 = distinct !{!18 , !19 , !"_ZNK2cl4sycl5intel3gpu4simdIiLi16EEplERKS4_ : %agg.result" }
157
- !19 = distinct !{!19 , !"_ZNK2cl4sycl5intel3gpu4simdIiLi16EEplERKS4_ " }
156
+ !18 = distinct !{!18 , !19 , !"_ZNK2cl4sycl5INTEL3gpu4simdIiLi16EEplERKS4_ : %agg.result" }
157
+ !19 = distinct !{!19 , !"_ZNK2cl4sycl5INTEL3gpu4simdIiLi16EEplERKS4_ " }
158
158
!20 = !{i32 8275 }
159
159
!21 = !{i32 8268 }
160
160
!22 = !{i32 8269 }
0 commit comments