diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index eab9f2fe43061..bd3bffac9b457 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1307,44 +1307,6 @@ pi_result _pi_context::getAvailableCommandList( pi_queue Queue, pi_command_list_ptr_t &CommandList, bool UseCopyEngine, bool AllowBatching, ze_command_queue_handle_t *ForcedCmdQueue) { - // This is a hack. TODO: Proper CommandList allocation per Executable Graph. - if( Queue->Properties & PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION ) { - // TODO: Create new Command List. - if(Queue->LazyCommandListMap.empty()) { - const bool UseCopyEngine = false; - // Adding createCommandList() to LazyCommandListMap - ze_fence_handle_t ZeFence; - ZeStruct ZeFenceDesc; - ze_command_list_handle_t ZeCommandList; - - uint32_t QueueGroupOrdinal; - auto &QGroup = Queue->getQueueGroup(UseCopyEngine); - auto &ZeCommandQueue = - //ForcedCmdQueue ? *ForcedCmdQueue : - QGroup.getZeQueue(&QueueGroupOrdinal); - //if (ForcedCmdQueue) - // QueueGroupOrdinal = QGroup.getCmdQueueOrdinal(ZeCommandQueue); - - ZeStruct ZeCommandListDesc; - ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal; - - ZE_CALL(zeCommandListCreate, (Queue->Context->ZeContext, Queue->Device->ZeDevice, - &ZeCommandListDesc, &ZeCommandList)); - - ZE_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence)); - std::tie(CommandList, std::ignore) = Queue->LazyCommandListMap.insert( - std::pair( - ZeCommandList, {ZeFence, false, ZeCommandQueue, QueueGroupOrdinal})); - - Queue->insertActiveBarriers(CommandList, UseCopyEngine); - // - CommandList->second.ZeFenceInUse = true; - } else { - CommandList = Queue->LazyCommandListMap.begin(); - } - return PI_SUCCESS; - } - // Immediate commandlists have been pre-allocated and are always available. if (Queue->Device->useImmediateCommandLists()) { CommandList = Queue->getQueueGroup(UseCopyEngine).getImmCmdList(); @@ -1583,11 +1545,6 @@ void _pi_queue::CaptureIndirectAccesses() { pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList, bool IsBlocking, bool OKToBatchCommand) { - // When executing a Graph, defer execution if this is a command - // which could be batched (i.e. likely a kernel submission) - if (this->Properties & PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION && OKToBatchCommand) - return PI_SUCCESS; - bool UseCopyEngine = CommandList->second.isCopy(this); // If the current LastCommandEvent is the nullptr, then it means @@ -3828,14 +3785,7 @@ pi_result piQueueFinish(pi_queue Queue) { // Flushing cross-queue dependencies is covered by createAndRetainPiZeEventList, // so this can be left as a no-op. pi_result piQueueFlush(pi_queue Queue) { - if( Queue->Properties & PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION ) { - - pi_command_list_ptr_t CommandList{}; - // TODO: - CommandList = Queue->LazyCommandListMap.begin(); - - Queue->executeCommandList(CommandList, false, false); - } + (void)Queue; return PI_SUCCESS; } diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 25fda46b65d89..f6b31229f0535 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -937,8 +937,6 @@ struct _pi_queue : _pi_object { // Map of all command lists used in this queue. pi_command_list_map_t CommandListMap; - // TODO: Assign Graph related command lists to command_graph object - pi_command_list_map_t LazyCommandListMap; // Helper data structure to hold all variables related to batching typedef struct CommandBatch { diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index d478d2e302026..b79507558be95 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -33,10 +33,7 @@ void graph_impl::exec(sycl::detail::queue_ptr q) { } void graph_impl::exec_and_wait(sycl::detail::queue_ptr q) { - if (MFirst) { - exec(q); - MFirst = false; - } + exec(q); q->wait(); } diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index cb4f5cb2d6e81..6a385648844e0 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -81,8 +81,6 @@ struct node_impl { struct graph_impl { std::set MRoots; std::list MSchedule; - // TODO: Change one time initialization to per executable object - bool MFirst; graph_ptr MParent; @@ -95,7 +93,7 @@ struct graph_impl { template node_ptr add(graph_ptr impl, T cgf, const std::vector &dep = {}); - graph_impl() : MFirst(true) {} + graph_impl() {} }; } // namespace detail diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index e6caedcc66bcc..f55b845b1c84e 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -278,12 +278,6 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { TelemetryEvent = instrumentationProlog(CodeLoc, Name, StreamID, IId); #endif - if (has_property()) { - const detail::plugin &Plugin = getPlugin(); - if (Plugin.getBackend() == backend::ext_oneapi_level_zero) - Plugin.call(getHandleRef()); - } - std::vector> WeakEvents; std::vector SharedEvents; { diff --git a/sycl/test/graph/graph-explicit-dotp.cpp b/sycl/test/graph/graph-explicit-dotp.cpp index 561057bc13f84..11af99f10d45c 100644 --- a/sycl/test/graph/graph-explicit-dotp.cpp +++ b/sycl/test/graph/graph-explicit-dotp.cpp @@ -83,16 +83,15 @@ int main() { // Using shortcut for executing a graph of commands q.exec_graph(executable_graph).wait(); - if (*dotp != host_gold_result()) { - std::cout << "Error unexpected result!\n"; - } + if (*dotp == host_gold_result()) + std::cout << "Dot product explicit graph test passed." << std::endl; + else + std::cout << "Dot product explicit graph test failed." << std::endl; sycl::free(dotp, q); sycl::free(x, q); sycl::free(y, q); sycl::free(z, q); - std::cout << "done.\n"; - return 0; } diff --git a/sycl/test/graph/graph-explicit-queue-shortcuts.cpp b/sycl/test/graph/graph-explicit-queue-shortcuts.cpp index 19d74ae895e86..f40516b831a61 100644 --- a/sycl/test/graph/graph-explicit-queue-shortcuts.cpp +++ b/sycl/test/graph/graph-explicit-queue-shortcuts.cpp @@ -31,9 +31,18 @@ int main() { auto e3 = q.exec_graph(executable_graph, e1); q.exec_graph(executable_graph, {e2, e3}).wait(); - sycl::free(arr, q); + bool check = true; + for (int i = 0; i < n; i++) { + if (arr[i] != 1) + check = false; + } + + if (check) + std::cout << "Queue shortcuts explicit graph test passed." << std::endl; + else + std::cout << "Queue shortcuts explicit graph test failed." << std::endl; - std::cout << "done " << arr[0] << std::endl; + sycl::free(arr, q); return 0; } diff --git a/sycl/test/graph/graph-explicit-reduction.cpp b/sycl/test/graph/graph-explicit-reduction.cpp index 9a2788079570c..686d7f4374523 100644 --- a/sycl/test/graph/graph-explicit-reduction.cpp +++ b/sycl/test/graph/graph-explicit-reduction.cpp @@ -28,10 +28,13 @@ int main() { e.wait(); + if (*output == 45) + std::cout << "Reduction explicit graph test passed." << std::endl; + else + std::cout << "Reduction explicit graph test failed." << std::endl; + sycl::free(input, q); sycl::free(output, q); - std::cout << "done\n"; - return 0; } diff --git a/sycl/test/graph/graph-explicit-repeated-exec.cpp b/sycl/test/graph/graph-explicit-repeated-exec.cpp new file mode 100644 index 0000000000000..969aa168f2b34 --- /dev/null +++ b/sycl/test/graph/graph-explicit-repeated-exec.cpp @@ -0,0 +1,67 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +#include +#include + +#include + +int main() { + + sycl::property_list properties{ + sycl::property::queue::in_order{}, + sycl::ext::oneapi::property::queue::lazy_execution{}}; + + sycl::queue q{sycl::gpu_selector_v, properties}; + + sycl::ext::oneapi::experimental::command_graph g; + + const size_t n = 10; + float *arr = sycl::malloc_shared(n, q); + for (int i = 0; i < n; i++) { + arr[i] = 0; + } + + g.add([&](sycl::handler &h) { + h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> idx) { + size_t i = idx; + arr[i] += 1; + }); + }); + + bool check = true; + for (int i = 0; i < n; i++) { + if (arr[i] != 0) + check = false; + } + + auto executable_graph = g.finalize(q.get_context()); + + for (int i = 0; i < n; i++) { + if (arr[i] != 0) + check = false; + } + + q.submit([&](sycl::handler &h) { h.exec_graph(executable_graph); }); + + for (int i = 0; i < n; i++) { + if (arr[i] != 1) + check = false; + } + + q.submit([&](sycl::handler &h) { h.exec_graph(executable_graph); }); + + for (int i = 0; i < n; i++) { + if (arr[i] != 2) + check = false; + } + + if (check) + std::cout << "Repeated execution of an explicit graph test passed." + << std::endl; + else + std::cout << "Repeated execution of an explicit graph test failed." + << std::endl; + + sycl::free(arr, q); + + return 0; +} diff --git a/sycl/test/graph/graph-explicit-simple.cpp b/sycl/test/graph/graph-explicit-simple.cpp index 1e7bf1fec9afb..f782242ee38a9 100644 --- a/sycl/test/graph/graph-explicit-simple.cpp +++ b/sycl/test/graph/graph-explicit-simple.cpp @@ -16,6 +16,9 @@ int main() { const size_t n = 10; float *arr = sycl::malloc_shared(n, q); + for (int i = 0; i < n; i++) { + arr[i] = 0; + } g.add([&](sycl::handler &h) { h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> idx) { @@ -24,19 +27,32 @@ int main() { }); }); - auto result_before_exec1 = arr[0]; + bool check = true; + for (int i = 0; i < n; i++) { + if (arr[i] != 0) + check = false; + } auto executable_graph = g.finalize(q.get_context()); - auto result_before_exec2 = arr[0]; + for (int i = 0; i < n; i++) { + if (arr[i] != 0) + check = false; + } q.submit([&](sycl::handler &h) { h.exec_graph(executable_graph); }); - auto result = arr[0]; + for (int i = 0; i < n; i++) { + if (arr[i] != 1) + check = false; + } - sycl::free(arr, q); + if (check) + std::cout << "Simple explicit graph test passed." << std::endl; + else + std::cout << "Simple explicit graph test failed." << std::endl; - std::cout << "done.\n"; + sycl::free(arr, q); return 0; }