88
88
#endif
89
89
#define _KERNELFUNCPARAM (a ) _KERNELFUNCPARAMTYPE a
90
90
91
+ #if defined(__SYCL_UNNAMED_LAMBDA__)
92
+ // We can't use nested types (e.g. struct S defined inside main() routine) to
93
+ // name kernels. At the same time, we have to provide a unique kernel name for
94
+ // sycl::fill and the only thing we can use to introduce that uniqueness (in
95
+ // general) is the template parameter T which might be exactly that nested type.
96
+ // That means we cannot support sycl::fill(void *, T&, size_t) for such types in
97
+ // general. However, we can do better than that when unnamed lambdas are
98
+ // enabled, so do it here! See also https://github.com/intel/llvm/issues/469.
91
99
template <typename DataT, int Dimensions, sycl::access::mode AccessMode,
92
100
sycl::access::target AccessTarget,
93
101
sycl::access::placeholder IsPlaceholder>
94
- class __fill ;
102
+ using __fill = sycl::detail::auto_name;
103
+ template <typename T> using __usmfill = sycl::detail::auto_name;
104
+ template <typename T> using __usmfill2d = sycl::detail::auto_name;
105
+ template <typename T> using __usmmemcpy2d = sycl::detail::auto_name;
106
+
107
+ template <typename T_Src, typename T_Dst, int Dims,
108
+ sycl::access::mode AccessMode, sycl::access::target AccessTarget,
109
+ sycl::access::placeholder IsPlaceholder>
110
+ using __copyAcc2Ptr = sycl::detail::auto_name;
95
111
112
+ template <typename T_Src, typename T_Dst, int Dims,
113
+ sycl::access::mode AccessMode, sycl::access::target AccessTarget,
114
+ sycl::access::placeholder IsPlaceholder>
115
+ using __copyPtr2Acc = sycl::detail::auto_name;
116
+
117
+ template <typename T_Src, int Dims_Src, sycl::access::mode AccessMode_Src,
118
+ sycl::access::target AccessTarget_Src, typename T_Dst, int Dims_Dst,
119
+ sycl::access::mode AccessMode_Dst,
120
+ sycl::access::target AccessTarget_Dst,
121
+ sycl::access::placeholder IsPlaceholder_Src,
122
+ sycl::access::placeholder IsPlaceholder_Dst>
123
+ using __copyAcc2Acc = sycl::detail::auto_name;
124
+ #else
125
+ // Limited fallback path for when unnamed lambdas aren't available. Cannot
126
+ // handle nested types.
127
+ template <typename DataT, int Dimensions, sycl::access::mode AccessMode,
128
+ sycl::access::target AccessTarget,
129
+ sycl::access::placeholder IsPlaceholder>
130
+ class __fill ;
96
131
template <typename T> class __usmfill ;
97
132
template <typename T> class __usmfill2d ;
98
133
template <typename T> class __usmmemcpy2d ;
@@ -114,6 +149,7 @@ template <typename T_Src, int Dims_Src, sycl::access::mode AccessMode_Src,
114
149
sycl::access::placeholder IsPlaceholder_Src,
115
150
sycl::access::placeholder IsPlaceholder_Dst>
116
151
class __copyAcc2Acc ;
152
+ #endif
117
153
118
154
// For unit testing purposes
119
155
class MockHandler ;
@@ -860,9 +896,8 @@ class __SYCL_EXPORT handler {
860
896
return false ;
861
897
862
898
range<1 > LinearizedRange (Src.size ());
863
- parallel_for<
864
- class __copyAcc2Acc <TSrc, DimSrc, ModeSrc, TargetSrc, TDst, DimDst,
865
- ModeDst, TargetDst, IsPHSrc, IsPHDst>>(
899
+ parallel_for<__copyAcc2Acc<TSrc, DimSrc, ModeSrc, TargetSrc, TDst, DimDst,
900
+ ModeDst, TargetDst, IsPHSrc, IsPHDst>>(
866
901
LinearizedRange, [=](id<1 > Id) {
867
902
size_t Index = Id[0 ];
868
903
id<DimSrc> SrcId = detail::getDelinearizedId (Src.get_range (), Index);
@@ -889,9 +924,8 @@ class __SYCL_EXPORT handler {
889
924
if (!MIsHost)
890
925
return false ;
891
926
892
- single_task<
893
- class __copyAcc2Acc <TSrc, DimSrc, ModeSrc, TargetSrc, TDst, DimDst,
894
- ModeDst, TargetDst, IsPHSrc, IsPHDst>>(
927
+ single_task<__copyAcc2Acc<TSrc, DimSrc, ModeSrc, TargetSrc, TDst, DimDst,
928
+ ModeDst, TargetDst, IsPHSrc, IsPHDst>>(
895
929
[=]() { *(Dst.get_pointer ()) = *(Src.get_pointer ()); });
896
930
return true ;
897
931
}
@@ -908,8 +942,7 @@ class __SYCL_EXPORT handler {
908
942
copyAccToPtrHost (accessor<TSrc, Dim, AccMode, AccTarget, IsPH> Src,
909
943
TDst *Dst) {
910
944
range<Dim> Range = Src.get_range ();
911
- parallel_for<
912
- class __copyAcc2Ptr <TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
945
+ parallel_for<__copyAcc2Ptr<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
913
946
Range, [=](id<Dim> Index) {
914
947
const size_t LinearIndex = detail::getLinearIndex (Index, Range);
915
948
using TSrcNonConst = typename std::remove_const_t <TSrc>;
@@ -927,7 +960,7 @@ class __SYCL_EXPORT handler {
927
960
std::enable_if_t <Dim == 0 >
928
961
copyAccToPtrHost (accessor<TSrc, Dim, AccMode, AccTarget, IsPH> Src,
929
962
TDst *Dst) {
930
- single_task<class __copyAcc2Ptr <TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
963
+ single_task<__copyAcc2Ptr<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
931
964
[=]() {
932
965
using TSrcNonConst = typename std::remove_const_t <TSrc>;
933
966
*(reinterpret_cast <TSrcNonConst *>(Dst)) = *(Src.get_pointer ());
@@ -944,8 +977,7 @@ class __SYCL_EXPORT handler {
944
977
copyPtrToAccHost (TSrc *Src,
945
978
accessor<TDst, Dim, AccMode, AccTarget, IsPH> Dst) {
946
979
range<Dim> Range = Dst.get_range ();
947
- parallel_for<
948
- class __copyPtr2Acc <TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
980
+ parallel_for<__copyPtr2Acc<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
949
981
Range, [=](id<Dim> Index) {
950
982
const size_t LinearIndex = detail::getLinearIndex (Index, Range);
951
983
Dst[Index] = (reinterpret_cast <const TDst *>(Src))[LinearIndex];
@@ -962,7 +994,7 @@ class __SYCL_EXPORT handler {
962
994
std::enable_if_t <Dim == 0 >
963
995
copyPtrToAccHost (TSrc *Src,
964
996
accessor<TDst, Dim, AccMode, AccTarget, IsPH> Dst) {
965
- single_task<class __copyPtr2Acc <TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
997
+ single_task<__copyPtr2Acc<TSrc, TDst, Dim, AccMode, AccTarget, IsPH>>(
966
998
[=]() {
967
999
*(Dst.get_pointer ()) = *(reinterpret_cast <const TDst *>(Src));
968
1000
});
@@ -2551,13 +2583,11 @@ class __SYCL_EXPORT handler {
2551
2583
*PatternPtr = Pattern;
2552
2584
} else if constexpr (Dims == 0 ) {
2553
2585
// Special case for zero-dim accessors.
2554
- parallel_for<
2555
- class __fill <T, Dims, AccessMode, AccessTarget, IsPlaceholder>>(
2586
+ parallel_for<__fill<T, Dims, AccessMode, AccessTarget, IsPlaceholder>>(
2556
2587
range<1 >(1 ), [=](id<1 >) { Dst = Pattern; });
2557
2588
} else {
2558
2589
range<Dims> Range = Dst.get_range ();
2559
- parallel_for<
2560
- class __fill <T, Dims, AccessMode, AccessTarget, IsPlaceholder>>(
2590
+ parallel_for<__fill<T, Dims, AccessMode, AccessTarget, IsPlaceholder>>(
2561
2591
Range, [=](id<Dims> Index) { Dst[Index] = Pattern; });
2562
2592
}
2563
2593
}
@@ -2572,7 +2602,7 @@ class __SYCL_EXPORT handler {
2572
2602
throwIfActionIsCreated ();
2573
2603
static_assert (is_device_copyable<T>::value,
2574
2604
" Pattern must be device copyable" );
2575
- parallel_for<class __usmfill <T>>(range<1 >(Count), [=](id<1 > Index) {
2605
+ parallel_for<__usmfill<T>>(range<1 >(Count), [=](id<1 > Index) {
2576
2606
T *CastedPtr = static_cast <T *>(Ptr);
2577
2607
CastedPtr[Index] = Pattern;
2578
2608
});
@@ -3277,7 +3307,7 @@ class __SYCL_EXPORT handler {
3277
3307
// Limit number of work items to be resistant to big copies.
3278
3308
id<2 > Chunk = computeFallbackKernelBounds (Height, Width);
3279
3309
id<2 > Iterations = (Chunk + id<2 >{Height, Width} - 1 ) / Chunk;
3280
- parallel_for<class __usmmemcpy2d <T>>(
3310
+ parallel_for<__usmmemcpy2d<T>>(
3281
3311
range<2 >{Chunk[0 ], Chunk[1 ]}, [=](id<2 > Index) {
3282
3312
T *CastedDest = static_cast <T *>(Dest);
3283
3313
const T *CastedSrc = static_cast <const T *>(Src);
@@ -3323,7 +3353,7 @@ class __SYCL_EXPORT handler {
3323
3353
// Limit number of work items to be resistant to big fill operations.
3324
3354
id<2 > Chunk = computeFallbackKernelBounds (Height, Width);
3325
3355
id<2 > Iterations = (Chunk + id<2 >{Height, Width} - 1 ) / Chunk;
3326
- parallel_for<class __usmfill2d <T>>(
3356
+ parallel_for<__usmfill2d<T>>(
3327
3357
range<2 >{Chunk[0 ], Chunk[1 ]}, [=](id<2 > Index) {
3328
3358
T *CastedDest = static_cast <T *>(Dest);
3329
3359
for (uint32_t I = 0 ; I < Iterations[0 ]; ++I) {
0 commit comments