Skip to content

Commit e9b9d45

Browse files
authored
[SYCL] Defer shadow copy creation in SYCLMemObjT (#11348)
#10334 causes a performance regression since `HostPtr` can't be reused when it's read-only. This PR fixes the regression by deferring the copy operation to the creation of a writable accessor. It includes following the changes: - A new `SYCLMemObjT::MCreateShadowCopy` to defer allocation. When the `HostPtr` cannot be reused since it's read-only, `SYCLMemObjT::handleHostData` sets this member to a function that will allocate the shadow copy. - A new `SYCLMemObjT::handleWriteAccessorCreation` member function. This function calls `SYCLMemObjT::MCreateShadowCopy` and updates any existing `MAllocaCommands` if `MUserPtr` changed. - Whenever a writable host or device accessor is created, `handleWriteAccessorCreation` gets called to ensure that any required memory allocation occurs. With this change, the allocation and copying overhead occurs during the creation of the first writable accessor. There's no overhead if all of the relevant accessors use `sycl::access_mode::read`. --------- Signed-off-by: Michael Aziz <[email protected]>
1 parent 95da7be commit e9b9d45

File tree

7 files changed

+96
-4
lines changed

7 files changed

+96
-4
lines changed

sycl/source/detail/scheduler/graph_builder.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,11 @@ Command *
521521
Scheduler::GraphBuilder::addHostAccessor(Requirement *Req,
522522
std::vector<Command *> &ToEnqueue) {
523523

524+
if (Req->MAccessMode != sycl::access_mode::read) {
525+
auto SYCLMemObj = static_cast<detail::SYCLMemObjT *>(Req->MSYCLMemObj);
526+
SYCLMemObj->handleWriteAccessorCreation();
527+
}
528+
524529
const QueueImplPtr &HostQueue = getInstance().getDefaultHostQueue();
525530

526531
MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req, ToEnqueue);

sycl/source/detail/sycl_mem_obj_t.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,19 @@ void SYCLMemObjT::detachMemoryObject(
233233
Scheduler::getInstance().deferMemObjRelease(Self);
234234
}
235235

236+
void SYCLMemObjT::handleWriteAccessorCreation() {
237+
const auto InitialUserPtr = MUserPtr;
238+
MCreateShadowCopy();
239+
MCreateShadowCopy = []() -> void {};
240+
if (MRecord != nullptr && MUserPtr != InitialUserPtr) {
241+
for (auto &it : MRecord->MAllocaCommands) {
242+
if (it->MMemAllocation == InitialUserPtr) {
243+
it->MMemAllocation = MUserPtr;
244+
}
245+
}
246+
}
247+
}
248+
236249
} // namespace detail
237250
} // namespace _V1
238251
} // namespace sycl

sycl/source/detail/sycl_mem_obj_t.hpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,10 +173,14 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI {
173173
has_property<property::image::use_host_ptr>();
174174
}
175175

176-
bool canReuseHostPtr(void *HostPtr, const size_t RequiredAlign) {
176+
bool canReadHostPtr(void *HostPtr, const size_t RequiredAlign) {
177177
bool Aligned =
178178
(reinterpret_cast<std::uintptr_t>(HostPtr) % RequiredAlign) == 0;
179-
return !MHostPtrReadOnly && (Aligned || useHostPtr());
179+
return Aligned || useHostPtr();
180+
}
181+
182+
bool canReuseHostPtr(void *HostPtr, const size_t RequiredAlign) {
183+
return !MHostPtrReadOnly && canReadHostPtr(HostPtr, RequiredAlign);
180184
}
181185

182186
void handleHostData(void *HostPtr, const size_t RequiredAlign) {
@@ -190,6 +194,14 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI {
190194
if (HostPtr) {
191195
if (canReuseHostPtr(HostPtr, RequiredAlign)) {
192196
MUserPtr = HostPtr;
197+
} else if (canReadHostPtr(HostPtr, RequiredAlign)) {
198+
MUserPtr = HostPtr;
199+
MCreateShadowCopy = [this, RequiredAlign, HostPtr]() -> void {
200+
setAlign(RequiredAlign);
201+
MShadowCopy = allocateHostMem();
202+
MUserPtr = MShadowCopy;
203+
std::memcpy(MUserPtr, HostPtr, MSizeInBytes);
204+
};
193205
} else {
194206
setAlign(RequiredAlign);
195207
MShadowCopy = allocateHostMem();
@@ -213,9 +225,17 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI {
213225
if (!MHostPtrReadOnly)
214226
set_final_data_from_storage();
215227

216-
if (canReuseHostPtr(HostPtr.get(), RequiredAlign))
228+
if (canReuseHostPtr(HostPtr.get(), RequiredAlign)) {
229+
MUserPtr = HostPtr.get();
230+
} else if (canReadHostPtr(HostPtr.get(), RequiredAlign)) {
217231
MUserPtr = HostPtr.get();
218-
else {
232+
MCreateShadowCopy = [this, RequiredAlign, HostPtr]() -> void {
233+
setAlign(RequiredAlign);
234+
MShadowCopy = allocateHostMem();
235+
MUserPtr = MShadowCopy;
236+
std::memcpy(MUserPtr, HostPtr.get(), MSizeInBytes);
237+
};
238+
} else {
219239
setAlign(RequiredAlign);
220240
MShadowCopy = allocateHostMem();
221241
MUserPtr = MShadowCopy;
@@ -248,6 +268,8 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI {
248268
static size_t getBufSizeForContext(const ContextImplPtr &Context,
249269
pi_native_handle MemObject);
250270

271+
void handleWriteAccessorCreation();
272+
251273
void *allocateMem(ContextImplPtr Context, bool InitFromUserData,
252274
void *HostPtr,
253275
sycl::detail::pi::PiEvent &InteropEvent) override {
@@ -349,6 +371,10 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI {
349371
bool MIsInternal = false;
350372
// The number of graphs which are currently using this memory object.
351373
std::atomic<size_t> MGraphUseCount = 0;
374+
// Function which creates a shadow copy of the host pointer. This is used to
375+
// defer the memory allocation and copying to the point where a writable
376+
// accessor is created.
377+
std::function<void(void)> MCreateShadowCopy = []() -> void {};
352378
bool MOwnNativeHandle = true;
353379
};
354380
} // namespace detail

sycl/source/handler.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,10 @@ void handler::associateWithHandlerCommon(detail::AccessorImplPtr AccImpl,
527527
"are not allowed to be used in command graphs.");
528528
}
529529
detail::Requirement *Req = AccImpl.get();
530+
if (Req->MAccessMode != sycl::access_mode::read) {
531+
auto SYCLMemObj = static_cast<detail::SYCLMemObjT *>(Req->MSYCLMemObj);
532+
SYCLMemObj->handleWriteAccessorCreation();
533+
}
530534
// Add accessor to the list of requirements.
531535
if (Req->MAccessRange.size() != 0)
532536
CGData.MRequirements.push_back(Req);
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
4+
#include <cstdlib>
5+
#include <iostream>
6+
#include <sycl/sycl.hpp>
7+
8+
constexpr int N = 10 * 1024 * 1024;
9+
10+
int main() {
11+
std::vector<int> vec(N, 1);
12+
const int *const host_address = &vec[0];
13+
{
14+
// Create a buffer with a read-only hostData pointer.
15+
sycl::buffer<int, 1> buf(static_cast<const int *>(vec.data()),
16+
sycl::range<1>{N});
17+
18+
// Assert that the hostData pointer is being reused.
19+
{
20+
sycl::host_accessor<int, 1, sycl::access_mode::read> r_acc{buf};
21+
assert(&r_acc[0] == host_address && "hostData was copied");
22+
}
23+
24+
// Assert that creating a writeable accessor copies the data and the
25+
// hostData pointer is not being reused.
26+
{
27+
sycl::host_accessor<int, 1, sycl::access_mode::write> rw_acc{buf};
28+
assert(&rw_acc[0] != host_address &&
29+
"writable accessor references read-only hostData");
30+
31+
rw_acc[0] = 0;
32+
assert(rw_acc[0] == 0 && "failed to write to accessor");
33+
}
34+
}
35+
36+
// Assert that the vector was never modified (since hostData is read-only).
37+
assert(vec[0] == 1 && "read-only hostData was modified");
38+
39+
std::cout << "Test passed!" << std::endl;
40+
return EXIT_SUCCESS;
41+
}

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3810,6 +3810,7 @@ _ZN4sycl3_V16detail11SYCLMemObjT16determineHostPtrERKSt10shared_ptrINS1_12contex
38103810
_ZN4sycl3_V16detail11SYCLMemObjT16updateHostMemoryEPv
38113811
_ZN4sycl3_V16detail11SYCLMemObjT16updateHostMemoryEv
38123812
_ZN4sycl3_V16detail11SYCLMemObjT20getBufSizeForContextERKSt10shared_ptrINS1_12context_implEEm
3813+
_ZN4sycl3_V16detail11SYCLMemObjT27handleWriteAccessorCreationEv
38133814
_ZN4sycl3_V16detail11SYCLMemObjTC1EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE
38143815
_ZN4sycl3_V16detail11SYCLMemObjTC1EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE23_pi_image_channel_order22_pi_image_channel_typeNS0_5rangeILi3EEEjm
38153816
_ZN4sycl3_V16detail11SYCLMemObjTC1EmRKNS0_7contextEmNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE

sycl/test/abi/sycl_symbols_windows.dump

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -933,6 +933,7 @@
933933
?begin_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA_NAEAVqueue@67@@Z
934934
?begin_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA_NAEBV?$vector@Vqueue@_V1@sycl@@V?$allocator@Vqueue@_V1@sycl@@@std@@@std@@@Z
935935
?build_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBV?$kernel_bundle@$0A@@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBVproperty_list@23@@Z
936+
?canReadHostPtr@SYCLMemObjT@detail@_V1@sycl@@QEAA_NPEAX_K@Z
936937
?canReuseHostPtr@SYCLMemObjT@detail@_V1@sycl@@QEAA_NPEAX_K@Z
937938
?cancel_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAAXXZ
938939
?category@exception@_V1@sycl@@QEBAAEBVerror_category@std@@XZ
@@ -1289,6 +1290,7 @@
12891290
?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXPEAX_K@Z
12901291
?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXPEBX_K@Z
12911292
?handleRelease@buffer_plain@detail@_V1@sycl@@IEBAXXZ
1293+
?handleWriteAccessorCreation@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ
12921294
?has@device@_V1@sycl@@QEBA_NW4aspect@23@@Z
12931295
?has@platform@_V1@sycl@@QEBA_NW4aspect@23@@Z
12941296
?hasUserDataPtr@SYCLMemObjT@detail@_V1@sycl@@UEBA_NXZ

0 commit comments

Comments
 (0)