diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 1a6937e47498d..eab9f2fe43061 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1583,9 +1583,11 @@ void _pi_queue::CaptureIndirectAccesses() { pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList, bool IsBlocking, bool OKToBatchCommand) { - // When executing a Graph, defer execution - if( this->Properties & PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION ) return PI_SUCCESS; - + // When executing a Graph, defer execution if this is a command + // which could be batched (i.e. likely a kernel submission) + if (this->Properties & PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION && OKToBatchCommand) + return PI_SUCCESS; + bool UseCopyEngine = CommandList->second.isCopy(this); // If the current LastCommandEvent is the nullptr, then it means @@ -3827,31 +3829,13 @@ pi_result piQueueFinish(pi_queue Queue) { // so this can be left as a no-op. pi_result piQueueFlush(pi_queue Queue) { if( Queue->Properties & PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION ) { + pi_command_list_ptr_t CommandList{}; // TODO: CommandList = Queue->LazyCommandListMap.begin(); - - auto &ZeCommandQueue = CommandList->second.ZeQueue; - // Scope of the lock must be till the end of the function, otherwise new mem - // allocs can be created between the moment when we made a snapshot and the - // moment when command list is closed and executed. But mutex is locked only - // if indirect access tracking enabled, because std::defer_lock is used. - // unique_lock destructor at the end of the function will unlock the mutex - // if it was locked (which happens only if IndirectAccessTrackingEnabled is - // true). - std::unique_lock ContextsLock( - Queue->Device->Platform->ContextsMutex, std::defer_lock); - - // Close the command list and have it ready for dispatch. - ZE_CALL(zeCommandListClose, (CommandList->first)); - - // Offload command list to the GPU for asynchronous execution - auto ZeCommandList = CommandList->first; - auto ZeResult = ZE_CALL_NOCHECK( - zeCommandQueueExecuteCommandLists, - (ZeCommandQueue, 1, &ZeCommandList, CommandList->second.ZeFence)); + + Queue->executeCommandList(CommandList, false, false); } - (void)Queue; return PI_SUCCESS; }