Skip to content

Commit 377b3fa

Browse files
againullpvchupin
authored andcommitted
[SYCL] Enable useful (not random) output from stream
Pool of flush buffers is allocated in local memory. This pool contains space for each work item in the work group. Each work item writes to its own space (flush buffer), as a result output from different work items is not mixed. Data is flushed to global buffer on endl, flush or when kernel execution is finished. Global buffer contains all output from the kernel. Offset of the WI's flush buffer in the pool is calculated only once in __init method. Call to this method is generated by frontend. In the current implementation user should explicitly flush data on the host device. Data is not flushed automatically after kernel execution because of the missing feature in the scheduler. Signed-off-by: Artur Gainullin <[email protected]>
1 parent f752698 commit 377b3fa

File tree

11 files changed

+328
-112
lines changed

11 files changed

+328
-112
lines changed

clang/lib/Sema/SemaSYCL.cpp

+17-2
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ class Util {
6363
/// sampler class.
6464
static bool isSyclSamplerType(const QualType &Ty);
6565

66+
/// Checks whether given clang type is a full specialization of the SYCL
67+
/// stream class.
68+
static bool isSyclStreamType(const QualType &Ty);
69+
6670
/// Checks whether given clang type is a standard SYCL API class with given
6771
/// name.
6872
/// \param Ty the clang type being checked
@@ -784,7 +788,7 @@ static CompoundStmt *CreateOpenCLKernelBody(Sema &S,
784788
// All special SYCL objects must have __init method
785789
CXXMethodDecl *InitMethod = getInitMethod(CRD);
786790
assert(InitMethod &&
787-
"The accessor/sampler must have the __init method");
791+
"The accessor/sampler/stream must have the __init method");
788792
unsigned NumParams = InitMethod->getNumParams();
789793
llvm::SmallVector<Expr *, 4> ParamDREs(NumParams);
790794
auto KFP = KernelFuncParam;
@@ -794,7 +798,9 @@ static CompoundStmt *CreateOpenCLKernelBody(Sema &S,
794798
S.Context, NestedNameSpecifierLoc(), SourceLocation(), *KFP,
795799
false, DeclarationNameInfo(), ParamType, VK_LValue);
796800
}
797-
std::advance(KernelFuncParam, NumParams - 1);
801+
802+
if (NumParams)
803+
std::advance(KernelFuncParam, NumParams - 1);
798804

799805
DeclAccessPair FieldDAP = DeclAccessPair::make(Field, AS_none);
800806
// [kernel_obj or wrapper object].special_obj
@@ -923,6 +929,11 @@ static CompoundStmt *CreateOpenCLKernelBody(Sema &S,
923929
DeclarationNameInfo(Field->getDeclName(), SourceLocation()),
924930
nullptr, Field->getType(), VK_LValue, OK_Ordinary, NOUR_None);
925931
getExprForWrappedAccessorInit(CRD, Lhs);
932+
if (Util::isSyclStreamType(FieldType)) {
933+
// Generate call to the __init method of the stream class after
934+
// initializing accessors wrapped by this stream object
935+
getExprForSpecialSYCLObj(FieldType, Field, CRD, KernelObjCloneRef);
936+
}
926937
}
927938
} else {
928939
llvm_unreachable("Unsupported field type");
@@ -1732,6 +1743,10 @@ bool Util::isSyclSamplerType(const QualType &Ty) {
17321743
return isSyclType(Ty, "sampler");
17331744
}
17341745

1746+
bool Util::isSyclStreamType(const QualType &Ty) {
1747+
return isSyclType(Ty, "stream");
1748+
}
1749+
17351750
bool Util::isSyclType(const QualType &Ty, StringRef Name, bool Tmpl) {
17361751
Decl::Kind ClassDeclKind =
17371752
Tmpl ? Decl::Kind::ClassTemplateSpecialization : Decl::Kind::CXXRecord;

sycl/include/CL/sycl/accessor.hpp

+12-12
Original file line numberDiff line numberDiff line change
@@ -1111,26 +1111,26 @@ class accessor<DataT, Dimensions, AccessMode, access::target::local,
11111111
return getQualifiedPtr()[Index];
11121112
}
11131113

1114-
template <int Dims = Dimensions,
1115-
typename = detail::enable_if_t<Dims == 0 &&
1116-
AccessMode == access::mode::atomic>>
1117-
operator atomic<DataT, AS>() const {
1114+
template <int Dims = Dimensions>
1115+
operator typename detail::enable_if_t<
1116+
Dims == 0 && AccessMode == access::mode::atomic, atomic<DataT, AS>>()
1117+
const {
11181118
return atomic<DataT, AS>(multi_ptr<DataT, AS>(getQualifiedPtr()));
11191119
}
11201120

1121-
template <int Dims = Dimensions,
1122-
typename = detail::enable_if_t<(Dims > 0) &&
1123-
AccessMode == access::mode::atomic>>
1124-
atomic<DataT, AS> operator[](id<Dimensions> Index) const {
1121+
template <int Dims = Dimensions>
1122+
typename detail::enable_if_t<(Dims > 0) && AccessMode == access::mode::atomic,
1123+
atomic<DataT, AS>>
1124+
operator[](id<Dimensions> Index) const {
11251125
const size_t LinearIndex = getLinearIndex(Index);
11261126
return atomic<DataT, AS>(
11271127
multi_ptr<DataT, AS>(getQualifiedPtr() + LinearIndex));
11281128
}
11291129

1130-
template <int Dims = Dimensions,
1131-
typename = detail::enable_if_t<Dims == 1 &&
1132-
AccessMode == access::mode::atomic>>
1133-
atomic<DataT, AS> operator[](size_t Index) const {
1130+
template <int Dims = Dimensions>
1131+
typename detail::enable_if_t<Dims == 1 && AccessMode == access::mode::atomic,
1132+
atomic<DataT, AS>>
1133+
operator[](size_t Index) const {
11341134
return atomic<DataT, AS>(multi_ptr<DataT, AS>(getQualifiedPtr() + Index));
11351135
}
11361136

sycl/include/CL/sycl/detail/accessor_impl.hpp

+17
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,23 @@ class LocalAccessorImplHost {
141141
int MDims;
142142
int MElemSize;
143143
std::vector<char> MMem;
144+
145+
bool PerWI = false;
146+
size_t LocalMemSize;
147+
size_t MaxWGSize;
148+
void resize(size_t LocalSize, size_t GlobalSize) {
149+
if (GlobalSize != 1 && LocalSize != 1) {
150+
// If local size is not specified then work group size is chosen by
151+
// runtime. That is why try to allocate based on max work group size or
152+
// global size. In the worst case allocate 80% of local memory.
153+
size_t MinEstWGSize = LocalSize ? LocalSize : GlobalSize;
154+
MinEstWGSize = MinEstWGSize > MaxWGSize ? MaxWGSize : MinEstWGSize;
155+
size_t NewSize = MinEstWGSize * MSize[0];
156+
MSize[0] =
157+
NewSize > 8 * LocalMemSize / 10 ? 8 * LocalMemSize / 10 : NewSize;
158+
MMem.resize(NewSize * MElemSize);
159+
}
160+
}
144161
};
145162

146163
class LocalAccessorBaseHost {

0 commit comments

Comments
 (0)