From 97e690cb8916745f28c8731a863a29a1635a3e46 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 25 Sep 2020 12:10:17 +0300 Subject: [PATCH 1/9] [SYCL] Improve testing of host-task Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task.cpp | 192 +++++++++++++++++++++- 1 file changed, 183 insertions(+), 9 deletions(-) diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp index 3f981058c02bb..cecd1958d7df9 100644 --- a/sycl/test/host-interop-task/host-task.cpp +++ b/sycl/test/host-interop-task/host-task.cpp @@ -1,20 +1,42 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out %threads_lib -lOpenCL -// RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out -// RUN: %ACC_RUN_PLACEHOLDER %t.out +// RUN: %clangxx -std=c++14 -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out 1 +// RUN: %GPU_RUN_PLACEHOLDER %t.out 1 +// RUN: %ACC_RUN_PLACEHOLDER %t.out 1 + +// RUN: %CPU_RUN_PLACEHOLDER %t.out 2 +// RUN: %GPU_RUN_PLACEHOLDER %t.out 2 +// RUN: %ACC_RUN_PLACEHOLDER %t.out 2 + +// RUN: %CPU_RUN_PLACEHOLDER %t.out 3 +// RUN: %GPU_RUN_PLACEHOLDER %t.out 3 +// RUN: %ACC_RUN_PLACEHOLDER %t.out 3 + +// RUN: %CPU_RUN_PLACEHOLDER %t.out 4 +// RUN: %GPU_RUN_PLACEHOLDER %t.out 4 +// RUN: %ACC_RUN_PLACEHOLDER %t.out 4 #include +#include +#include +#include +#include using namespace cl::sycl; using namespace cl::sycl::access; static constexpr size_t BUFFER_SIZE = 1024; +static auto EH = [](exception_list EL) { + for (const std::exception_ptr &E : EL) { + throw E; + } +}; + // Check that a single host-task with a buffer will work void test1() { buffer Buffer{BUFFER_SIZE}; - queue Q; + queue Q(EH); Q.submit([&](handler &CGH) { auto Acc = Buffer.get_access(CGH); @@ -22,13 +44,16 @@ void test1() { // A no-op }); }); + + Q.wait_and_throw(); } +// Check that a host task after the kernel (deps via buffer) will work void test2() { buffer Buffer1{BUFFER_SIZE}; buffer Buffer2{BUFFER_SIZE}; - queue Q; + queue Q(EH); Q.submit([&](handler &CGH) { auto Acc = Buffer1.template get_access(CGH); @@ -55,10 +80,159 @@ void test2() { assert(Acc[Idx] == 123); } } + + Q.wait_and_throw(); } -int main() { - test1(); - test2(); +// Host-task depending on another host-task via both buffers and +// handler::depends_on() should not hang +void test3() { + queue Q(EH); + + static constexpr size_t BufferSize = 10 * 1024; + + buffer B0{range<1>{BufferSize}}; + buffer B1{range<1>{BufferSize}}; + buffer B2{range<1>{BufferSize}}; + buffer B3{range<1>{BufferSize}}; + buffer B4{range<1>{BufferSize}}; + buffer B5{range<1>{BufferSize}}; + buffer B6{range<1>{BufferSize}}; + buffer B7{range<1>{BufferSize}}; + buffer B8{range<1>{BufferSize}}; + buffer B9{range<1>{BufferSize}}; + + std::vector Deps; + + for (size_t Idx = 0; Idx < 10; ++Idx) { + event E = Q.submit([&](handler &CGH) { + CGH.depends_on(Deps); + + std::cout << "Submit: " << Idx << std::endl; + + auto Acc0 = B0.get_access(CGH); + auto Acc1 = B1.get_access(CGH); + auto Acc2 = B2.get_access(CGH); + auto Acc3 = B3.get_access(CGH); + auto Acc4 = B4.get_access(CGH); + auto Acc5 = B5.get_access(CGH); + auto Acc6 = B6.get_access(CGH); + auto Acc7 = B7.get_access(CGH); + auto Acc8 = B8.get_access(CGH); + auto Acc9 = B9.get_access(CGH); + + CGH.codeplay_host_task([=] { + uint64_t X = 0; + + X ^= reinterpret_cast(&Acc0[Idx + 0]); + X ^= reinterpret_cast(&Acc1[Idx + 1]); + X ^= reinterpret_cast(&Acc2[Idx + 2]); + X ^= reinterpret_cast(&Acc3[Idx + 3]); + X ^= reinterpret_cast(&Acc4[Idx + 4]); + X ^= reinterpret_cast(&Acc5[Idx + 5]); + X ^= reinterpret_cast(&Acc6[Idx + 6]); + X ^= reinterpret_cast(&Acc7[Idx + 7]); + X ^= reinterpret_cast(&Acc8[Idx + 8]); + X ^= reinterpret_cast(&Acc9[Idx + 9]); + + std::cout << " Start " << Idx << " (" << X << ")" << std::endl; + using namespace std::chrono_literals; + std::this_thread::sleep_for(100ms); + std::cout << " End " << Idx << std::endl; + }); + }); + + Deps = {E}; + } + + Q.wait_and_throw(); +} + +// Host-task depending on another host-task via handler::depends_on() only +// should not hang +void test4() { + queue Q(EH); + + static constexpr size_t BufferSize = 10 * 1024; + + buffer B0{range<1>{BufferSize}}; + buffer B1{range<1>{BufferSize}}; + buffer B2{range<1>{BufferSize}}; + buffer B3{range<1>{BufferSize}}; + buffer B4{range<1>{BufferSize}}; + buffer B5{range<1>{BufferSize}}; + + // This host task should be submitted without hesitation + event E1 = Q.submit([&](handler &CGH) { + std::cout << "Submit 1" << std::endl; + + auto Acc0 = B0.get_access(CGH); + auto Acc1 = B1.get_access(CGH); + auto Acc2 = B2.get_access(CGH); + + CGH.codeplay_host_task([=]{ + Acc0[0] = 1; + Acc1[0] = 2; + Acc2[0] = 3; + }); + }); + + // This host task is going to depend on blocked empty node of the first + // host-task (via buffer #2). Still this one should be enqueued. + event E2 = Q.submit([&](handler &CGH) { + std::cout << "Submit 2" << std::endl; + + auto Acc2 = B2.get_access(CGH); + auto Acc3 = B3.get_access(CGH); + + CGH.codeplay_host_task([=] { + Acc2[1] = 1; + Acc3[1] = 2; + }); + }); + + // This host-task only depends on the second host-task via + // handler::depends_on(). This one should not hang and should be enqueued + // after host-task #2. + event E3 = Q.submit([&](handler &CGH) { + CGH.depends_on(E2); + + std::cout << "Submit 3" << std::endl; + + auto Acc4 = B4.get_access(CGH); + auto Acc5 = B5.get_access(CGH); + + CGH.codeplay_host_task([=] { + Acc4[2] = 1; + Acc5[2] = 2; + }); + }); + + Q.wait_and_throw(); +} + +int main(int Argc, const char *Argv[]) { + if (Argc < 2) + return 1; + + int TestIdx = std::stoi(Argv[1]); + + switch (TestIdx) { + case 1: + test1(); + break; + case 2: + test2(); + break; + case 3: + test3(); + break; + case 4: + test4(); + break; + default: + return 1; + } + return 0; } From 14e951710dd1537677551d9fe54ff583e09a1471 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 25 Sep 2020 12:10:54 +0300 Subject: [PATCH 2/9] [SYCL] Disable test-case as it hangs now Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp index cecd1958d7df9..026c5a821a1b0 100644 --- a/sycl/test/host-interop-task/host-task.cpp +++ b/sycl/test/host-interop-task/host-task.cpp @@ -11,9 +11,9 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out 3 // RUN: %ACC_RUN_PLACEHOLDER %t.out 3 -// RUN: %CPU_RUN_PLACEHOLDER %t.out 4 -// RUN: %GPU_RUN_PLACEHOLDER %t.out 4 -// RUN: %ACC_RUN_PLACEHOLDER %t.out 4 +// RUNx: %CPU_RUN_PLACEHOLDER %t.out 4 +// RUNx: %GPU_RUN_PLACEHOLDER %t.out 4 +// RUNx: %ACC_RUN_PLACEHOLDER %t.out 4 #include #include From 299797bcdd00c00cb94d1ed18758dfad4d986699 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 25 Sep 2020 12:17:30 +0300 Subject: [PATCH 3/9] Address style issues Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task.cpp | 32 +++++++++++------------ 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp index 026c5a821a1b0..d91ef2e9f9461 100644 --- a/sycl/test/host-interop-task/host-task.cpp +++ b/sycl/test/host-interop-task/host-task.cpp @@ -18,8 +18,8 @@ #include #include #include -#include #include +#include using namespace cl::sycl; using namespace cl::sycl::access; @@ -170,7 +170,7 @@ void test4() { auto Acc1 = B1.get_access(CGH); auto Acc2 = B2.get_access(CGH); - CGH.codeplay_host_task([=]{ + CGH.codeplay_host_task([=] { Acc0[0] = 1; Acc1[0] = 2; Acc2[0] = 3; @@ -218,20 +218,20 @@ int main(int Argc, const char *Argv[]) { int TestIdx = std::stoi(Argv[1]); switch (TestIdx) { - case 1: - test1(); - break; - case 2: - test2(); - break; - case 3: - test3(); - break; - case 4: - test4(); - break; - default: - return 1; + case 1: + test1(); + break; + case 2: + test2(); + break; + case 3: + test3(); + break; + case 4: + test4(); + break; + default: + return 1; } return 0; From e7797f4b021a0453a3b296a398623ae79d81d3f2 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 25 Sep 2020 17:29:37 +0300 Subject: [PATCH 4/9] [SYCL] Improve testing of host-task Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp index d91ef2e9f9461..ccae5c8b6607a 100644 --- a/sycl/test/host-interop-task/host-task.cpp +++ b/sycl/test/host-interop-task/host-task.cpp @@ -104,7 +104,11 @@ void test3() { std::vector Deps; - for (size_t Idx = 0; Idx < 10; ++Idx) { + using namespace std::chrono_literals; + static constexpr size_t Count = 10; + + auto Start = std::chrono::steady_clock::now(); + for (size_t Idx = 0; Idx < Count; ++Idx) { event E = Q.submit([&](handler &CGH) { CGH.depends_on(Deps); @@ -134,11 +138,6 @@ void test3() { X ^= reinterpret_cast(&Acc7[Idx + 7]); X ^= reinterpret_cast(&Acc8[Idx + 8]); X ^= reinterpret_cast(&Acc9[Idx + 9]); - - std::cout << " Start " << Idx << " (" << X << ")" << std::endl; - using namespace std::chrono_literals; - std::this_thread::sleep_for(100ms); - std::cout << " End " << Idx << std::endl; }); }); @@ -146,6 +145,11 @@ void test3() { } Q.wait_and_throw(); + auto End = std::chrono::steady_clock::now(); + + constexpr auto Threshold = 2s; + + assert(End - Start < Threshold && "Host tasks were waiting for too long"); } // Host-task depending on another host-task via handler::depends_on() only From f0fce8fff3bee656171dfb4a51a0ec6404feaca1 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 25 Sep 2020 17:36:46 +0300 Subject: [PATCH 5/9] [SYCL] Remove unwanted include Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp index ccae5c8b6607a..9afb14d3f359b 100644 --- a/sycl/test/host-interop-task/host-task.cpp +++ b/sycl/test/host-interop-task/host-task.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include using namespace cl::sycl; From c94a24559f947b71d9dfd2ee7d1d52e50825a9e6 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 25 Sep 2020 17:39:22 +0300 Subject: [PATCH 6/9] [SYCL] Use default C++ std Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp index 9afb14d3f359b..ca355bcb4b654 100644 --- a/sycl/test/host-interop-task/host-task.cpp +++ b/sycl/test/host-interop-task/host-task.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -std=c++14 -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out 1 // RUN: %GPU_RUN_PLACEHOLDER %t.out 1 // RUN: %ACC_RUN_PLACEHOLDER %t.out 1 From 8666752831366e86cbebda3ad4d84f5cdce8b439 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 28 Sep 2020 17:36:13 +0300 Subject: [PATCH 7/9] [SYCL] Improve testing of host task Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task.cpp | 191 ++++++++++++++++++---- 1 file changed, 156 insertions(+), 35 deletions(-) diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp index ca355bcb4b654..aba34f069d369 100644 --- a/sycl/test/host-interop-task/host-task.cpp +++ b/sycl/test/host-interop-task/host-task.cpp @@ -15,9 +15,22 @@ // RUNx: %GPU_RUN_PLACEHOLDER %t.out 4 // RUNx: %ACC_RUN_PLACEHOLDER %t.out 4 +// RUNx: %CPU_RUN_PLACEHOLDER %t.out 5 +// RUNx: %GPU_RUN_PLACEHOLDER %t.out 5 +// RUNx: %ACC_RUN_PLACEHOLDER %t.out 5 + +// RUNx: %CPU_RUN_PLACEHOLDER %t.out 6 +// RUNx: %GPU_RUN_PLACEHOLDER %t.out 6 +// RUNx: %ACC_RUN_PLACEHOLDER %t.out 6 + +// RUNx: %CPU_RUN_PLACEHOLDER %t.out 7 +// RUNx: %GPU_RUN_PLACEHOLDER %t.out 7 +// RUNx: %ACC_RUN_PLACEHOLDER %t.out 7 + #include #include #include +#include #include using namespace cl::sycl; @@ -103,7 +116,6 @@ void test3() { std::vector Deps; - using namespace std::chrono_literals; static constexpr size_t Count = 10; auto Start = std::chrono::steady_clock::now(); @@ -146,6 +158,7 @@ void test3() { Q.wait_and_throw(); auto End = std::chrono::steady_clock::now(); + using namespace std::chrono_literals; constexpr auto Threshold = 2s; assert(End - Start < Threshold && "Host tasks were waiting for too long"); @@ -153,7 +166,7 @@ void test3() { // Host-task depending on another host-task via handler::depends_on() only // should not hang -void test4() { +void test4(size_t Count = 1) { queue Q(EH); static constexpr size_t BufferSize = 10 * 1024; @@ -165,51 +178,150 @@ void test4() { buffer B4{range<1>{BufferSize}}; buffer B5{range<1>{BufferSize}}; - // This host task should be submitted without hesitation - event E1 = Q.submit([&](handler &CGH) { - std::cout << "Submit 1" << std::endl; + for (size_t Idx = 1; Idx <= Count; ++Idx) { + // This host task should be submitted without hesitation + event E1 = Q.submit([&](handler &CGH) { + std::cout << "Submit 1" << std::endl; - auto Acc0 = B0.get_access(CGH); - auto Acc1 = B1.get_access(CGH); - auto Acc2 = B2.get_access(CGH); + auto Acc0 = B0.get_access(CGH); + auto Acc1 = B1.get_access(CGH); + auto Acc2 = B2.get_access(CGH); - CGH.codeplay_host_task([=] { - Acc0[0] = 1; - Acc1[0] = 2; - Acc2[0] = 3; + CGH.codeplay_host_task([=] { + Acc0[0] = 1 * Idx; + Acc1[0] = 2 * Idx; + Acc2[0] = 3 * Idx; + }); }); - }); - // This host task is going to depend on blocked empty node of the first - // host-task (via buffer #2). Still this one should be enqueued. - event E2 = Q.submit([&](handler &CGH) { - std::cout << "Submit 2" << std::endl; + // This host task is going to depend on blocked empty node of the first + // host-task (via buffer #2). Still this one should be enqueued. + event E2 = Q.submit([&](handler &CGH) { + std::cout << "Submit 2" << std::endl; - auto Acc2 = B2.get_access(CGH); - auto Acc3 = B3.get_access(CGH); + auto Acc2 = B2.get_access(CGH); + auto Acc3 = B3.get_access(CGH); - CGH.codeplay_host_task([=] { - Acc2[1] = 1; - Acc3[1] = 2; + CGH.codeplay_host_task([=] { + Acc2[1] = 1 * Idx; + Acc3[1] = 2 * Idx; + }); }); - }); - // This host-task only depends on the second host-task via - // handler::depends_on(). This one should not hang and should be enqueued - // after host-task #2. - event E3 = Q.submit([&](handler &CGH) { - CGH.depends_on(E2); + // This host-task only depends on the second host-task via + // handler::depends_on(). This one should not hang and should be eexecuted + // after host-task #2. + event E3 = Q.submit([&](handler &CGH) { + CGH.depends_on(E2); - std::cout << "Submit 3" << std::endl; + std::cout << "Submit 3" << std::endl; - auto Acc4 = B4.get_access(CGH); - auto Acc5 = B5.get_access(CGH); + auto Acc4 = B4.get_access(CGH); + auto Acc5 = B5.get_access(CGH); - CGH.codeplay_host_task([=] { - Acc4[2] = 1; - Acc5[2] = 2; + CGH.codeplay_host_task([=] { + Acc4[2] = 1 * Idx; + Acc5[2] = 2 * Idx; + }); }); - }); + } + + Q.wait_and_throw(); +} + +// Host-task depending on another host-task via handler::depends_on() only +// should not hang. A bit more complicated case with kernels depending on +// host-task being involved. +void test5(size_t Count = 1) { + queue Q(EH); + + static constexpr size_t BufferSize = 10 * 1024; + + buffer B0{range<1>{BufferSize}}; + buffer B1{range<1>{BufferSize}}; + buffer B2{range<1>{BufferSize}}; + buffer B3{range<1>{BufferSize}}; + buffer B4{range<1>{BufferSize}}; + buffer B5{range<1>{BufferSize}}; + + using namespace std::chrono_literals; + + for (size_t Idx = 1; Idx <= Count; ++Idx) { + // This host task should be submitted without hesitation + Q.submit([&](handler &CGH) { + std::cout << "Submit HT-1" << std::endl; + + auto Acc0 = B0.get_access(CGH); + + CGH.codeplay_host_task([=] { + std::this_thread::sleep_for(2s); + Acc0[0] = 1 * Idx; + }); + }); + + Q.submit([&](handler &CGH) { + std::cout << "Submit Kernel-1" << std::endl; + + auto Acc0 = B0.get_access(CGH); + + CGH.single_task([=] { + Acc0[1] = 1 * Idx; + }); + }); + + Q.submit([&](handler &CGH) { + std::cout << "Submit Kernel-2" << std::endl; + + auto Acc1 = B1.get_access(CGH); + + CGH.single_task([=] { + Acc1[2] = 1 * Idx; + }); + }); + + Q.submit([&](handler &CGH) { + std::cout << "Submit HT-2" << std::endl; + + auto Acc2 = B2.get_access(CGH); + + CGH.codeplay_host_task([=] { + std::this_thread::sleep_for(2s); + Acc2[3] = 1 * Idx; + }); + }); + + // This host task is going to depend on blocked empty node of the second + // host-task (via buffer #0). Still this one should be enqueued. + event EHT3 = Q.submit([&](handler &CGH) { + std::cout << "Submit HT-3" << std::endl; + + auto Acc0 = B0.get_access(CGH); + auto Acc1 = B1.get_access(CGH); + auto Acc2 = B2.get_access(CGH); + + CGH.codeplay_host_task([=] { + std::this_thread::sleep_for(2s); + Acc0[4] = 1 * Idx; + Acc1[4] = 2 * Idx; + Acc2[4] = 3 * IDx; + }); + }); + + // This host-task only depends on the third host-task via + // handler::depends_on(). This one should not hang and should be executed + // after host-task #3. + Q.submit([&](handler &CGH) { + std::cout << "Submit HT-4" << std::endl; + + CGH.depends_on(EHT3); + + auto Acc5 = B5.get_access(CGH); + + CGH.codeplay_host_task([=] { + Acc5[5] = 1 * IDx; + }); + }); + } Q.wait_and_throw(); } @@ -233,6 +345,15 @@ int main(int Argc, const char *Argv[]) { case 4: test4(); break; + case 5: + test5(); + break; + case 6: + test4(10); + break; + case 7: + test5(10); + break; default: return 1; } From d291557ed08cb456c527ef2c3721ceb31328f6de Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 28 Sep 2020 17:51:33 +0300 Subject: [PATCH 8/9] Fix syntax issue Signed-off-by: Sergey Kanaev --- sycl/test/host-interop-task/host-task.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp index aba34f069d369..dc05e98469fb0 100644 --- a/sycl/test/host-interop-task/host-task.cpp +++ b/sycl/test/host-interop-task/host-task.cpp @@ -303,7 +303,7 @@ void test5(size_t Count = 1) { std::this_thread::sleep_for(2s); Acc0[4] = 1 * Idx; Acc1[4] = 2 * Idx; - Acc2[4] = 3 * IDx; + Acc2[4] = 3 * Idx; }); }); @@ -318,7 +318,7 @@ void test5(size_t Count = 1) { auto Acc5 = B5.get_access(CGH); CGH.codeplay_host_task([=] { - Acc5[5] = 1 * IDx; + Acc5[5] = 1 * Idx; }); }); } From 17e01d105a69b2f02693a8635541972920053a7c Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 28 Sep 2020 18:39:23 +0300 Subject: [PATCH 9/9] [SYCL] Additional enqueue for host-tasks Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 2 ++ .../source/detail/scheduler/graph_builder.cpp | 8 ++++-- sycl/source/detail/scheduler/scheduler.cpp | 26 +++++++++++++++++++ sycl/source/detail/scheduler/scheduler.hpp | 20 ++++++++++++++ sycl/test/host-interop-task/host-task.cpp | 24 ++++++++--------- 5 files changed, 66 insertions(+), 14 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index d6dad42b68b34..8f093f07d6009 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -243,6 +243,8 @@ class DispatchHostTask { for (const DepDesc &Dep : Deps) Scheduler::enqueueLeavesOfReqUnlocked(Dep.MDepRequirement); + + Sched.enqueueHostTasksUnlocked(); } } }; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 2086b74d0f273..01be39a9729cf 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -108,10 +108,12 @@ static void unmarkVisitedNodes(std::vector &Visited) { Cmd->MMarks.MVisited = false; } -static void handleVisitedNodes(std::vector &Visited) { +void +Scheduler::GraphBuilder::handleVisitedNodes(std::vector &Visited) { for (Command *Cmd : Visited) { if (Cmd->MMarks.MToBeDeleted) { Cmd->getEvent()->setCommand(nullptr); + Scheduler::getInstance().removeHostTaskCommandUnlocked(Cmd); delete Cmd; } else Cmd->MMarks.MVisited = false; @@ -803,9 +805,11 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, NewCmd->addDep(e); } - if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) + if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) { NewCmd->MEmptyCmd = addEmptyCmd(NewCmd.get(), NewCmd->getCG().MRequirements, Queue, Command::BlockReason::HostTask); + Scheduler::getInstance().addHostTaskCommandUnlocked(NewCmd.get()); + } if (MPrintOptionsArray[AfterAddCG]) printGraphAsDot("after_addCG"); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index c2a0c3fbbb509..3945879f439e9 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -286,6 +286,32 @@ MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { return Req->MSYCLMemObj->MRecord.get(); } +void Scheduler::addHostTaskCommandUnlocked(Command *Cmd) { + HostTaskCommandXRefT XRef = HostTaskCmds.insert(HostTaskCmds.end(), Cmd); + HostTaskCmdXRefs[Cmd] = XRef; +} + +void Scheduler::removeHostTaskCommandUnlocked(Command *Cmd) { + auto It = HostTaskCmdXRefs.find(Cmd); + + if (It == HostTaskCmdXRefs.end()) + return; + + HostTaskCommandXRefT &XRef = It->second; + HostTaskCmds.erase(XRef); + + HostTaskCmdXRefs.erase(It); +} + +void Scheduler::enqueueHostTasksUnlocked() { + for (Command *Cmd : HostTaskCmds) { + EnqueueResultT Res; + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); + } +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index d6dec5e599f8c..5cb582070c0fa 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include /// \defgroup sycl_graph DPC++ Execution Graph @@ -585,6 +586,8 @@ class Scheduler { private: friend class ::MockScheduler; + static void handleVisitedNodes(std::vector &Visited); + /// Searches for suitable alloca in memory record. /// /// If none found, creates new one. @@ -743,6 +746,23 @@ class Scheduler { friend class stream_impl; + // List of host-task commands. This data structure is employed to overcome + // certain use-cases with deadlocks involving host-task. The use of this list + // is to enqueue (if possible) host-tasks when another host task is finished. + // List is used in order to remain the order of host-tasks unchanged. + // A map is employed to allow for quick lookup and removal of host-task + // command upon cleanup. + // Access to this data structure is guarded with graph read-write lock. + using HostTaskCommandsT = std::list; + using HostTaskCommandXRefT = HostTaskCommandsT::iterator; + HostTaskCommandsT HostTaskCmds; + std::unordered_map HostTaskCmdXRefs; + + void addHostTaskCommandUnlocked(Command *Cmd); + void removeHostTaskCommandUnlocked(Command *Cmd); + void enqueueHostTasksUnlocked(); + + // Protects stream buffers pool std::mutex StreamBuffersPoolMutex; std::map StreamBuffersPool; diff --git a/sycl/test/host-interop-task/host-task.cpp b/sycl/test/host-interop-task/host-task.cpp index dc05e98469fb0..0a3a30b6f7944 100644 --- a/sycl/test/host-interop-task/host-task.cpp +++ b/sycl/test/host-interop-task/host-task.cpp @@ -11,21 +11,21 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out 3 // RUN: %ACC_RUN_PLACEHOLDER %t.out 3 -// RUNx: %CPU_RUN_PLACEHOLDER %t.out 4 -// RUNx: %GPU_RUN_PLACEHOLDER %t.out 4 -// RUNx: %ACC_RUN_PLACEHOLDER %t.out 4 +// RUN: %CPU_RUN_PLACEHOLDER %t.out 4 +// RUN: %GPU_RUN_PLACEHOLDER %t.out 4 +// RUN: %ACC_RUN_PLACEHOLDER %t.out 4 -// RUNx: %CPU_RUN_PLACEHOLDER %t.out 5 -// RUNx: %GPU_RUN_PLACEHOLDER %t.out 5 -// RUNx: %ACC_RUN_PLACEHOLDER %t.out 5 +// RUN: %CPU_RUN_PLACEHOLDER %t.out 5 +// RUN: %GPU_RUN_PLACEHOLDER %t.out 5 +// RUN: %ACC_RUN_PLACEHOLDER %t.out 5 -// RUNx: %CPU_RUN_PLACEHOLDER %t.out 6 -// RUNx: %GPU_RUN_PLACEHOLDER %t.out 6 -// RUNx: %ACC_RUN_PLACEHOLDER %t.out 6 +// RUN: %CPU_RUN_PLACEHOLDER %t.out 6 +// RUN: %GPU_RUN_PLACEHOLDER %t.out 6 +// RUN: %ACC_RUN_PLACEHOLDER %t.out 6 -// RUNx: %CPU_RUN_PLACEHOLDER %t.out 7 -// RUNx: %GPU_RUN_PLACEHOLDER %t.out 7 -// RUNx: %ACC_RUN_PLACEHOLDER %t.out 7 +// RUN: %CPU_RUN_PLACEHOLDER %t.out 7 +// RUN: %GPU_RUN_PLACEHOLDER %t.out 7 +// RUN: %ACC_RUN_PLACEHOLDER %t.out 7 #include #include