Skip to content

Commit d45acd8

Browse files
committed
[SYCL] Enable useful (not random) output from stream
Pool of flush buffers is allocated in local memory. This pool contains space for each work item in the work group. Each work item writes to its own space (flush buffer), as a result output from different work items is not mixed. Data is flushed to global buffer on endl, flush or when kernel execution is finished. Global buffer contains all output from the kernel. Offset of the WI's flush buffer in the pool is calculated only once in __init method. Call to this method is generated by frontend. In the current implementation user should explicitly flush data on the host device. Data is not flushed automatically after kernel execution because of the missing feature in the scheduler. Signed-off-by: Artur Gainullin <[email protected]>
1 parent faecc73 commit d45acd8

File tree

11 files changed

+328
-112
lines changed

11 files changed

+328
-112
lines changed

clang/lib/Sema/SemaSYCL.cpp

+17-2
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ class Util {
6363
/// sampler class.
6464
static bool isSyclSamplerType(const QualType &Ty);
6565

66+
/// Checks whether given clang type is a full specialization of the SYCL
67+
/// stream class.
68+
static bool isSyclStreamType(const QualType &Ty);
69+
6670
/// Checks whether given clang type is a standard SYCL API class with given
6771
/// name.
6872
/// \param Ty the clang type being checked
@@ -770,7 +774,7 @@ static CompoundStmt *CreateOpenCLKernelBody(Sema &S,
770774
// All special SYCL objects must have __init method
771775
CXXMethodDecl *InitMethod = getInitMethod(CRD);
772776
assert(InitMethod &&
773-
"The accessor/sampler must have the __init method");
777+
"The accessor/sampler/stream must have the __init method");
774778
unsigned NumParams = InitMethod->getNumParams();
775779
llvm::SmallVector<Expr *, 4> ParamDREs(NumParams);
776780
auto KFP = KernelFuncParam;
@@ -780,7 +784,9 @@ static CompoundStmt *CreateOpenCLKernelBody(Sema &S,
780784
S.Context, NestedNameSpecifierLoc(), SourceLocation(), *KFP,
781785
false, DeclarationNameInfo(), ParamType, VK_LValue);
782786
}
783-
std::advance(KernelFuncParam, NumParams - 1);
787+
788+
if (NumParams)
789+
std::advance(KernelFuncParam, NumParams - 1);
784790

785791
DeclAccessPair FieldDAP = DeclAccessPair::make(Field, AS_none);
786792
// [kernel_obj or wrapper object].special_obj
@@ -909,6 +915,11 @@ static CompoundStmt *CreateOpenCLKernelBody(Sema &S,
909915
DeclarationNameInfo(Field->getDeclName(), SourceLocation()),
910916
nullptr, Field->getType(), VK_LValue, OK_Ordinary, NOUR_None);
911917
getExprForWrappedAccessorInit(CRD, Lhs);
918+
if (Util::isSyclStreamType(FieldType)) {
919+
// Generate call to the __init method of the stream class after
920+
// initializing accessors wrapped by this stream object
921+
getExprForSpecialSYCLObj(FieldType, Field, CRD, KernelObjCloneRef);
922+
}
912923
}
913924
} else {
914925
llvm_unreachable("Unsupported field type");
@@ -1714,6 +1725,10 @@ bool Util::isSyclSamplerType(const QualType &Ty) {
17141725
return isSyclType(Ty, "sampler");
17151726
}
17161727

1728+
bool Util::isSyclStreamType(const QualType &Ty) {
1729+
return isSyclType(Ty, "stream");
1730+
}
1731+
17171732
bool Util::isSyclType(const QualType &Ty, StringRef Name, bool Tmpl) {
17181733
Decl::Kind ClassDeclKind =
17191734
Tmpl ? Decl::Kind::ClassTemplateSpecialization : Decl::Kind::CXXRecord;

sycl/include/CL/sycl/accessor.hpp

+12-12
Original file line numberDiff line numberDiff line change
@@ -1111,26 +1111,26 @@ class accessor<DataT, Dimensions, AccessMode, access::target::local,
11111111
return getQualifiedPtr()[Index];
11121112
}
11131113

1114-
template <int Dims = Dimensions,
1115-
typename = detail::enable_if_t<Dims == 0 &&
1116-
AccessMode == access::mode::atomic>>
1117-
operator atomic<DataT, AS>() const {
1114+
template <int Dims = Dimensions>
1115+
operator typename detail::enable_if_t<
1116+
Dims == 0 && AccessMode == access::mode::atomic, atomic<DataT, AS>>()
1117+
const {
11181118
return atomic<DataT, AS>(multi_ptr<DataT, AS>(getQualifiedPtr()));
11191119
}
11201120

1121-
template <int Dims = Dimensions,
1122-
typename = detail::enable_if_t<(Dims > 0) &&
1123-
AccessMode == access::mode::atomic>>
1124-
atomic<DataT, AS> operator[](id<Dimensions> Index) const {
1121+
template <int Dims = Dimensions>
1122+
typename detail::enable_if_t<(Dims > 0) && AccessMode == access::mode::atomic,
1123+
atomic<DataT, AS>>
1124+
operator[](id<Dimensions> Index) const {
11251125
const size_t LinearIndex = getLinearIndex(Index);
11261126
return atomic<DataT, AS>(
11271127
multi_ptr<DataT, AS>(getQualifiedPtr() + LinearIndex));
11281128
}
11291129

1130-
template <int Dims = Dimensions,
1131-
typename = detail::enable_if_t<Dims == 1 &&
1132-
AccessMode == access::mode::atomic>>
1133-
atomic<DataT, AS> operator[](size_t Index) const {
1130+
template <int Dims = Dimensions>
1131+
typename detail::enable_if_t<Dims == 1 && AccessMode == access::mode::atomic,
1132+
atomic<DataT, AS>>
1133+
operator[](size_t Index) const {
11341134
return atomic<DataT, AS>(multi_ptr<DataT, AS>(getQualifiedPtr() + Index));
11351135
}
11361136

sycl/include/CL/sycl/detail/accessor_impl.hpp

+17
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,23 @@ class LocalAccessorImplHost {
141141
int MDims;
142142
int MElemSize;
143143
std::vector<char> MMem;
144+
145+
bool PerWI = false;
146+
size_t LocalMemSize;
147+
size_t MaxWGSize;
148+
void resize(size_t LocalSize, size_t GlobalSize) {
149+
if (GlobalSize != 1 && LocalSize != 1) {
150+
// If local size is not specified then work group size is chosen by
151+
// runtime. That is why try to allocate based on max work group size or
152+
// global size. In the worst case allocate 80% of local memory.
153+
size_t MinEstWGSize = LocalSize ? LocalSize : GlobalSize;
154+
MinEstWGSize = MinEstWGSize > MaxWGSize ? MaxWGSize : MinEstWGSize;
155+
size_t NewSize = MinEstWGSize * MSize[0];
156+
MSize[0] =
157+
NewSize > 8 * LocalMemSize / 10 ? 8 * LocalMemSize / 10 : NewSize;
158+
MMem.resize(NewSize * MElemSize);
159+
}
160+
}
144161
};
145162

146163
class LocalAccessorBaseHost {

0 commit comments

Comments
 (0)