Skip to content

Commit f7ffa52

Browse files
[SYCL][NFCI][ABI-Break] Move handler members to impl (#14460)
This moves some of the members in the handler class to its impl class. Doing so allows developers to change these arguments without breaking ABI. This also moves more implementation details, such as command-group classes, launch configuration information, argument information and HostTask tracking, into sources to avoid hard-to-find ABI breaks in the communication between headers and runtime library. In addition to this, the following improvements are made: * The HostKernel class has been simplified to no longer have call and runOnHost functions. * The HostTask wrapper class has been moved to sources and the owner has been changed from a unique_ptr to a shared_ptr, which prevents the need for including host_task_impl.hpp in odd places. --------- Signed-off-by: Larsen, Steffen <[email protected]>
1 parent 3254bca commit f7ffa52

File tree

85 files changed

+1102
-1360
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+1102
-1360
lines changed

sycl/include/sycl/detail/cg_types.hpp

Lines changed: 28 additions & 281 deletions
Original file line numberDiff line numberDiff line change
@@ -37,117 +37,34 @@ class handler;
3737
namespace detail {
3838
class HostTask;
3939

40-
// The structure represents kernel argument.
41-
class ArgDesc {
42-
public:
43-
ArgDesc(sycl::detail::kernel_param_kind_t Type, void *Ptr, int Size,
44-
int Index)
45-
: MType(Type), MPtr(Ptr), MSize(Size), MIndex(Index) {}
46-
47-
sycl::detail::kernel_param_kind_t MType;
48-
void *MPtr;
49-
int MSize;
50-
int MIndex;
51-
};
52-
53-
// The structure represents NDRange - global, local sizes, global offset,
54-
// number of dimensions, and the cluster dimensions if applicable.
55-
class NDRDescT {
56-
// The method initializes all sizes for dimensions greater than the passed one
57-
// to the default values, so they will not affect execution.
58-
void setNDRangeLeftover(int Dims_) {
59-
for (int I = Dims_; I < 3; ++I) {
60-
GlobalSize[I] = 1;
61-
LocalSize[I] = LocalSize[0] ? 1 : 0;
62-
GlobalOffset[I] = 0;
63-
NumWorkGroups[I] = 0;
64-
}
65-
}
66-
67-
public:
68-
NDRDescT()
69-
: GlobalSize{0, 0, 0}, LocalSize{0, 0, 0}, NumWorkGroups{0, 0, 0},
70-
Dims{0} {}
71-
72-
template <int Dims_> void set(sycl::range<Dims_> NumWorkItems) {
73-
for (int I = 0; I < Dims_; ++I) {
74-
GlobalSize[I] = NumWorkItems[I];
75-
LocalSize[I] = 0;
76-
GlobalOffset[I] = 0;
77-
NumWorkGroups[I] = 0;
78-
}
79-
setNDRangeLeftover(Dims_);
80-
Dims = Dims_;
81-
}
82-
83-
// Initializes this ND range descriptor with given range of work items and
84-
// offset.
85-
template <int Dims_>
86-
void set(sycl::range<Dims_> NumWorkItems, sycl::id<Dims_> Offset) {
87-
for (int I = 0; I < Dims_; ++I) {
88-
GlobalSize[I] = NumWorkItems[I];
89-
LocalSize[I] = 0;
90-
GlobalOffset[I] = Offset[I];
91-
NumWorkGroups[I] = 0;
92-
}
93-
setNDRangeLeftover(Dims_);
94-
Dims = Dims_;
95-
}
96-
97-
template <int Dims_> void set(sycl::nd_range<Dims_> ExecutionRange) {
98-
for (int I = 0; I < Dims_; ++I) {
99-
GlobalSize[I] = ExecutionRange.get_global_range()[I];
100-
LocalSize[I] = ExecutionRange.get_local_range()[I];
101-
GlobalOffset[I] = ExecutionRange.get_offset()[I];
102-
NumWorkGroups[I] = 0;
103-
}
104-
setNDRangeLeftover(Dims_);
105-
Dims = Dims_;
106-
}
107-
108-
void set(int Dims_, sycl::nd_range<3> ExecutionRange) {
109-
for (int I = 0; I < Dims_; ++I) {
110-
GlobalSize[I] = ExecutionRange.get_global_range()[I];
111-
LocalSize[I] = ExecutionRange.get_local_range()[I];
112-
GlobalOffset[I] = ExecutionRange.get_offset()[I];
113-
NumWorkGroups[I] = 0;
114-
}
115-
setNDRangeLeftover(Dims_);
116-
Dims = Dims_;
117-
}
118-
119-
template <int Dims_> void setNumWorkGroups(sycl::range<Dims_> N) {
120-
for (int I = 0; I < Dims_; ++I) {
121-
GlobalSize[I] = 0;
122-
// '0' is a mark to adjust before kernel launch when there is enough info:
123-
LocalSize[I] = 0;
124-
GlobalOffset[I] = 0;
125-
NumWorkGroups[I] = N[I];
126-
}
127-
setNDRangeLeftover(Dims_);
128-
Dims = Dims_;
129-
}
130-
131-
template <int Dims_> void setClusterDimensions(sycl::range<Dims_> N) {
132-
if (Dims_ != Dims) {
133-
throw std::runtime_error(
134-
"Dimensionality of cluster, global and local ranges must be same");
135-
}
136-
137-
for (int I = 0; I < Dims_; ++I) {
138-
ClusterDimensions[I] = N[I];
139-
}
140-
}
141-
142-
sycl::range<3> GlobalSize;
143-
sycl::range<3> LocalSize;
144-
sycl::id<3> GlobalOffset;
145-
/// Number of workgroups, used to record the number of workgroups from the
146-
/// simplest form of parallel_for_work_group. If set, all other fields must be
147-
/// zero
148-
sycl::range<3> NumWorkGroups;
149-
sycl::range<3> ClusterDimensions{1, 1, 1};
150-
size_t Dims;
40+
/// Type of the command group.
41+
/// NOTE: Changing the values of any of these enumerators is an API-break.
42+
enum class CGType : unsigned int {
43+
None = 0,
44+
Kernel = 1,
45+
CopyAccToPtr = 2,
46+
CopyPtrToAcc = 3,
47+
CopyAccToAcc = 4,
48+
Barrier = 5,
49+
BarrierWaitlist = 6,
50+
Fill = 7,
51+
UpdateHost = 8,
52+
CopyUSM = 10,
53+
FillUSM = 11,
54+
PrefetchUSM = 12,
55+
CodeplayHostTask = 14,
56+
AdviseUSM = 15,
57+
Copy2DUSM = 16,
58+
Fill2DUSM = 17,
59+
Memset2DUSM = 18,
60+
CopyToDeviceGlobal = 19,
61+
CopyFromDeviceGlobal = 20,
62+
ReadWriteHostPipe = 21,
63+
ExecCommandBuffer = 22,
64+
CopyImage = 23,
65+
SemaphoreWait = 24,
66+
SemaphoreSignal = 25,
67+
ProfilingTag = 26,
15168
};
15269

15370
template <typename, typename T> struct check_fn_signature {
@@ -236,8 +153,6 @@ runKernelWithArg(KernelType KernelName, ArgType Arg) {
236153
// The pure virtual class aimed to store lambda/functors of any type.
237154
class HostKernelBase {
238155
public:
239-
// The method executes lambda stored using NDRange passed.
240-
virtual void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) = 0;
241156
// Return pointer to the lambda object.
242157
// Used to extract captured variables.
243158
virtual char *getPtr() = 0;
@@ -255,177 +170,9 @@ class HostKernel : public HostKernelBase {
255170

256171
public:
257172
HostKernel(KernelType Kernel) : MKernel(Kernel) {}
258-
void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override {
259-
// adjust ND range for serial host:
260-
NDRDescT AdjustedRange = NDRDesc;
261-
262-
if (NDRDesc.GlobalSize[0] == 0 && NDRDesc.NumWorkGroups[0] != 0) {
263-
// This is a special case - NDRange information is not complete, only the
264-
// desired number of work groups is set by the user. Choose work group
265-
// size (LocalSize), calculate the missing NDRange characteristics
266-
// needed to invoke the kernel and adjust the NDRange descriptor
267-
// accordingly. For some devices the work group size selection requires
268-
// access to the device's properties, hence such late "adjustment".
269-
range<3> WGsize{1, 1, 1}; // no better alternative for serial host?
270-
AdjustedRange.set(NDRDesc.Dims,
271-
nd_range<3>(NDRDesc.NumWorkGroups * WGsize, WGsize));
272-
}
273-
// If local size for host is not set explicitly, let's adjust it to 1,
274-
// so an exception for zero local size is not thrown.
275-
if (AdjustedRange.LocalSize[0] == 0)
276-
for (size_t I = 0; I < AdjustedRange.Dims; ++I)
277-
AdjustedRange.LocalSize[I] = 1;
278-
if (HPI)
279-
HPI->start();
280-
runOnHost(AdjustedRange);
281-
if (HPI)
282-
HPI->end();
283-
}
284173

285174
char *getPtr() override { return reinterpret_cast<char *>(&MKernel); }
286175

287-
template <class ArgT = KernelArgType>
288-
typename std::enable_if_t<std::is_same_v<ArgT, void>>
289-
runOnHost(const NDRDescT &) {
290-
runKernelWithoutArg(MKernel);
291-
}
292-
293-
template <class ArgT = KernelArgType>
294-
typename std::enable_if_t<std::is_same_v<ArgT, sycl::id<Dims>>>
295-
runOnHost(const NDRDescT &NDRDesc) {
296-
sycl::range<Dims> Range(InitializedVal<Dims, range>::template get<0>());
297-
sycl::id<Dims> Offset;
298-
sycl::range<Dims> Stride(
299-
InitializedVal<Dims, range>::template get<1>()); // initialized to 1
300-
sycl::range<Dims> UpperBound(
301-
InitializedVal<Dims, range>::template get<0>());
302-
for (int I = 0; I < Dims; ++I) {
303-
Range[I] = NDRDesc.GlobalSize[I];
304-
Offset[I] = NDRDesc.GlobalOffset[I];
305-
UpperBound[I] = Range[I] + Offset[I];
306-
}
307-
308-
detail::NDLoop<Dims>::iterate(
309-
/*LowerBound=*/Offset, Stride, UpperBound,
310-
[&](const sycl::id<Dims> &ID) {
311-
sycl::item<Dims, /*Offset=*/true> Item =
312-
IDBuilder::createItem<Dims, true>(Range, ID, Offset);
313-
314-
runKernelWithArg<const sycl::id<Dims> &>(MKernel, ID);
315-
});
316-
}
317-
318-
template <class ArgT = KernelArgType>
319-
typename std::enable_if_t<std::is_same_v<ArgT, item<Dims, /*Offset=*/false>>>
320-
runOnHost(const NDRDescT &NDRDesc) {
321-
sycl::id<Dims> ID;
322-
sycl::range<Dims> Range(InitializedVal<Dims, range>::template get<0>());
323-
for (int I = 0; I < Dims; ++I)
324-
Range[I] = NDRDesc.GlobalSize[I];
325-
326-
detail::NDLoop<Dims>::iterate(Range, [&](const sycl::id<Dims> ID) {
327-
sycl::item<Dims, /*Offset=*/false> Item =
328-
IDBuilder::createItem<Dims, false>(Range, ID);
329-
sycl::item<Dims, /*Offset=*/true> ItemWithOffset = Item;
330-
331-
runKernelWithArg<sycl::item<Dims, /*Offset=*/false>>(MKernel, Item);
332-
});
333-
}
334-
335-
template <class ArgT = KernelArgType>
336-
typename std::enable_if_t<std::is_same_v<ArgT, item<Dims, /*Offset=*/true>>>
337-
runOnHost(const NDRDescT &NDRDesc) {
338-
sycl::range<Dims> Range(InitializedVal<Dims, range>::template get<0>());
339-
sycl::id<Dims> Offset;
340-
sycl::range<Dims> Stride(
341-
InitializedVal<Dims, range>::template get<1>()); // initialized to 1
342-
sycl::range<Dims> UpperBound(
343-
InitializedVal<Dims, range>::template get<0>());
344-
for (int I = 0; I < Dims; ++I) {
345-
Range[I] = NDRDesc.GlobalSize[I];
346-
Offset[I] = NDRDesc.GlobalOffset[I];
347-
UpperBound[I] = Range[I] + Offset[I];
348-
}
349-
350-
detail::NDLoop<Dims>::iterate(
351-
/*LowerBound=*/Offset, Stride, UpperBound,
352-
[&](const sycl::id<Dims> &ID) {
353-
sycl::item<Dims, /*Offset=*/true> Item =
354-
IDBuilder::createItem<Dims, true>(Range, ID, Offset);
355-
356-
runKernelWithArg<sycl::item<Dims, /*Offset=*/true>>(MKernel, Item);
357-
});
358-
}
359-
360-
template <class ArgT = KernelArgType>
361-
typename std::enable_if_t<std::is_same_v<ArgT, nd_item<Dims>>>
362-
runOnHost(const NDRDescT &NDRDesc) {
363-
sycl::range<Dims> GroupSize(InitializedVal<Dims, range>::template get<0>());
364-
for (int I = 0; I < Dims; ++I) {
365-
if (NDRDesc.LocalSize[I] == 0 ||
366-
NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0)
367-
throw sycl::exception(make_error_code(errc::nd_range),
368-
"Invalid local size for global size");
369-
GroupSize[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I];
370-
}
371-
372-
sycl::range<Dims> LocalSize(InitializedVal<Dims, range>::template get<0>());
373-
sycl::range<Dims> GlobalSize(
374-
InitializedVal<Dims, range>::template get<0>());
375-
sycl::id<Dims> GlobalOffset;
376-
for (int I = 0; I < Dims; ++I) {
377-
GlobalOffset[I] = NDRDesc.GlobalOffset[I];
378-
LocalSize[I] = NDRDesc.LocalSize[I];
379-
GlobalSize[I] = NDRDesc.GlobalSize[I];
380-
}
381-
382-
detail::NDLoop<Dims>::iterate(GroupSize, [&](const id<Dims> &GroupID) {
383-
sycl::group<Dims> Group = IDBuilder::createGroup<Dims>(
384-
GlobalSize, LocalSize, GroupSize, GroupID);
385-
386-
detail::NDLoop<Dims>::iterate(LocalSize, [&](const id<Dims> &LocalID) {
387-
id<Dims> GlobalID =
388-
GroupID * id<Dims>{LocalSize} + LocalID + GlobalOffset;
389-
const sycl::item<Dims, /*Offset=*/true> GlobalItem =
390-
IDBuilder::createItem<Dims, true>(GlobalSize, GlobalID,
391-
GlobalOffset);
392-
const sycl::item<Dims, /*Offset=*/false> LocalItem =
393-
IDBuilder::createItem<Dims, false>(LocalSize, LocalID);
394-
const sycl::nd_item<Dims> NDItem =
395-
IDBuilder::createNDItem<Dims>(GlobalItem, LocalItem, Group);
396-
397-
runKernelWithArg<const sycl::nd_item<Dims>>(MKernel, NDItem);
398-
});
399-
});
400-
}
401-
402-
template <typename ArgT = KernelArgType>
403-
std::enable_if_t<std::is_same_v<ArgT, sycl::group<Dims>>>
404-
runOnHost(const NDRDescT &NDRDesc) {
405-
sycl::range<Dims> NGroups(InitializedVal<Dims, range>::template get<0>());
406-
407-
for (int I = 0; I < Dims; ++I) {
408-
if (NDRDesc.LocalSize[I] == 0 ||
409-
NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0)
410-
throw sycl::exception(make_error_code(errc::nd_range),
411-
"Invalid local size for global size");
412-
NGroups[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I];
413-
}
414-
415-
sycl::range<Dims> LocalSize(InitializedVal<Dims, range>::template get<0>());
416-
sycl::range<Dims> GlobalSize(
417-
InitializedVal<Dims, range>::template get<0>());
418-
for (int I = 0; I < Dims; ++I) {
419-
LocalSize[I] = NDRDesc.LocalSize[I];
420-
GlobalSize[I] = NDRDesc.GlobalSize[I];
421-
}
422-
detail::NDLoop<Dims>::iterate(NGroups, [&](const id<Dims> &GroupID) {
423-
sycl::group<Dims> Group =
424-
IDBuilder::createGroup<Dims>(GlobalSize, LocalSize, NGroups, GroupID);
425-
runKernelWithArg<sycl::group<Dims>>(MKernel, Group);
426-
});
427-
}
428-
429176
~HostKernel() = default;
430177
};
431178

sycl/include/sycl/ext/oneapi/memcpy2d.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
//===----------------------------------------------------------------------===//
88
#pragma once
99

10-
#include <sycl/detail/host_task_impl.hpp>
1110
#include <sycl/handler.hpp>
1211
#include <sycl/queue.hpp>
1312
#include <sycl/usm/usm_enums.hpp>

sycl/include/sycl/group.hpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -673,13 +673,7 @@ template <int Dimensions = 1> class __SYCL_TYPE(group) group {
673673
friend class detail::Builder;
674674
group(const range<Dimensions> &G, const range<Dimensions> &L,
675675
const range<Dimensions> GroupRange, const id<Dimensions> &I)
676-
: globalRange(G), localRange(L), groupRange(GroupRange), index(I) {
677-
// Make sure local range divides global without remainder:
678-
__SYCL_ASSERT(((G % L).size() == 0) &&
679-
"global range is not multiple of local");
680-
__SYCL_ASSERT((((G / L) - GroupRange).size() == 0) &&
681-
"inconsistent group constructor arguments");
682-
}
676+
: globalRange(G), localRange(L), groupRange(GroupRange), index(I) {}
683677
};
684678
} // namespace _V1
685679
} // namespace sycl

0 commit comments

Comments
 (0)