From 1ef4cd682f3a6f0ad8e5a6cf8958db30454eb2ba Mon Sep 17 00:00:00 2001 From: Ruyman Reyes Date: Wed, 20 May 2020 17:43:57 +0000 Subject: [PATCH 1/2] [SYCL][CUDA] Remove pi Event Callback implementation Since introduction of host tasks in #1471, `piEventCallback` and related functionality is not required by the SYCL-RT. Removing the implementation of this behaviour from the CUDA backend simplifies the submission of operations to streams and overall increases performance. Signed-off-by: Ruyman Reyes --- sycl/plugins/cuda/pi_cuda.cpp | 154 +++++++--------------------------- sycl/plugins/cuda/pi_cuda.hpp | 101 ++-------------------- 2 files changed, 36 insertions(+), 219 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 8a4d9540334a4..d48d8ed6501b9 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -276,13 +276,15 @@ _pi_event::_pi_event(pi_command_type type, pi_context context, pi_queue queue) isStarted_{false}, evEnd_{nullptr}, evStart_{nullptr}, evQueued_{nullptr}, queue_{queue}, context_{context} { - if (is_native_event()) { + if (type != PI_COMMAND_TYPE_USER) { PI_CHECK_ERROR(cuEventCreate(&evEnd_, CU_EVENT_DEFAULT)); if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) { PI_CHECK_ERROR(cuEventCreate(&evQueued_, CU_EVENT_DEFAULT)); PI_CHECK_ERROR(cuEventCreate(&evStart_, CU_EVENT_DEFAULT)); } + } else { + cl::sycl::detail::pi::die("User-defined events not implemented"); } if (queue_ != nullptr) { @@ -303,7 +305,7 @@ pi_result _pi_event::start() { pi_result result; try { - if (is_native_event() && queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) { + if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) { // NOTE: This relies on the default stream to be unused. result = PI_CHECK_ERROR(cuEventRecord(evQueued_, 0)); result = PI_CHECK_ERROR(cuEventRecord(evStart_, queue_->get())); @@ -313,8 +315,6 @@ pi_result _pi_event::start() { } isStarted_ = true; - // let observers know that the event is "submitted" - trigger_callback(get_execution_status()); return result; } @@ -351,37 +351,16 @@ pi_result _pi_event::record() { pi_result result = PI_INVALID_OPERATION; - if (is_native_event()) { - - if (!queue_) { - return PI_INVALID_QUEUE; - } + if (!queue_) { + return PI_INVALID_QUEUE; + } - CUstream cuStream = queue_->get(); + CUstream cuStream = queue_->get(); - try { - result = PI_CHECK_ERROR(cuEventRecord(evEnd_, cuStream)); - - result = cuda_piEventRetain(this); - try { - result = PI_CHECK_ERROR(cuLaunchHostFunc( - cuStream, - [](void *userData) { - pi_event event = reinterpret_cast(userData); - event->set_event_complete(); - cuda_piEventRelease(event); - }, - this)); - } catch (...) { - // If host function fails to enqueue we must release the event here - result = cuda_piEventRelease(this); - throw; - } - } catch (pi_result error) { - result = error; - } - } else { - result = PI_SUCCESS; + try { + result = PI_CHECK_ERROR(cuEventRecord(evEnd_, cuStream)); + } catch (pi_result error) { + result = error; } if (result == PI_SUCCESS) { @@ -392,65 +371,23 @@ pi_result _pi_event::record() { } pi_result _pi_event::wait() { - pi_result retErr; - if (is_native_event()) { - try { - retErr = PI_CHECK_ERROR(cuEventSynchronize(evEnd_)); - isCompleted_ = true; - } catch (pi_result error) { - retErr = error; - } - } else { - - while (!is_completed()) { - // wait for user event to complete - } - retErr = PI_SUCCESS; + try { + retErr = PI_CHECK_ERROR(cuEventSynchronize(evEnd_)); + isCompleted_ = true; + } catch (pi_result error) { + retErr = error; } - auto is_success = retErr == PI_SUCCESS; - auto status = is_success ? get_execution_status() : pi_int32(retErr); - - trigger_callback(status); - return retErr; } // makes all future work submitted to queue wait for all work captured in event. pi_result enqueueEventWait(pi_queue queue, pi_event event) { - if (event->is_native_event()) { - - // for native events, the cuStreamWaitEvent call is used. - // This makes all future work submitted to stream wait for all - // work captured in event. - - return PI_CHECK_ERROR(cuStreamWaitEvent(queue->get(), event->get(), 0)); - - } else { - - // for user events, we enqueue a callback. When invoked, the - // callback will block until the user event is marked as - // completed. - - static auto user_wait_func = [](void *user_data) { - // The host function must not make any CUDA API calls. - auto event = static_cast(user_data); - - // busy wait for user event to complete - event->wait(); - - // this function does not need the event to be kept alive - // anymore - cuda_piEventRelease(event); - }; - - // retain event to ensure it is still alive when the - // user_wait_func callback is invoked - cuda_piEventRetain(event); - - return PI_CHECK_ERROR(cuLaunchHostFunc(queue->get(), user_wait_func, event)); - } + // for native events, the cuStreamWaitEvent call is used. + // This makes all future work submitted to stream wait for all + // work captured in event. + return PI_CHECK_ERROR(cuStreamWaitEvent(queue->get(), event->get(), 0)); } _pi_program::_pi_program(pi_context ctxt) @@ -2763,37 +2700,13 @@ pi_result cuda_piEventSetCallback(pi_event event, pi_int32 command_exec_callback_type, pfn_notify notify, void *user_data) { - assert(event); - assert(notify); - assert(command_exec_callback_type == PI_EVENT_SUBMITTED || - command_exec_callback_type == PI_EVENT_RUNNING || - command_exec_callback_type == PI_EVENT_COMPLETE); - event_callback callback(pi_event_status(command_exec_callback_type), notify, - user_data); - - event->set_event_callback(callback); - + cl::sycl::detail::pi::die("Event Callback not implemented"); return PI_SUCCESS; } pi_result cuda_piEventSetStatus(pi_event event, pi_int32 execution_status) { - assert(execution_status >= PI_EVENT_COMPLETE && - execution_status <= PI_EVENT_QUEUED); - - if (!event || event->is_native_event()) { - return PI_INVALID_EVENT; - } - - if (execution_status == PI_EVENT_COMPLETE) { - return event->set_event_complete(); - } else if (execution_status < 0) { - // TODO: A negative integer value causes all enqueued commands that wait - // on this user event to be terminated. - cl::sycl::detail::pi::die("cuda_piEventSetStatus support for negative execution_status not " - "implemented."); - } - + cl::sycl::detail::pi::die("Event Set Status not implemented"); return PI_INVALID_VALUE; } @@ -2821,19 +2734,13 @@ pi_result cuda_piEventRelease(pi_event event) { if (event->decrement_reference_count() == 0) { std::unique_ptr<_pi_event> event_ptr{event}; pi_result result = PI_INVALID_EVENT; - - if (event->is_native_event()) { - try { - ScopedContext active(event->get_context()); - auto cuEvent = event->get(); - result = PI_CHECK_ERROR(cuEventDestroy(cuEvent)); - } catch (...) { - result = PI_OUT_OF_RESOURCES; - } - } else { - result = PI_SUCCESS; + try { + ScopedContext active(event->get_context()); + auto cuEvent = event->get(); + result = PI_CHECK_ERROR(cuEventDestroy(cuEvent)); + } catch (...) { + result = PI_OUT_OF_RESOURCES; } - return result; } @@ -2888,9 +2795,6 @@ pi_result cuda_piEnqueueEventsWait(pi_queue command_queue, /// \return PI_SUCCESS on success. PI_INVALID_EVENT if given a user event. pi_result cuda_piextEventGetNativeHandle(pi_event event, pi_native_handle *nativeHandle) { - if (event->is_user_event()) { - return PI_INVALID_EVENT; - } *nativeHandle = reinterpret_cast(event->get()); return PI_SUCCESS; } diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index 382ef5ed85c3e..fa05dd2e174d0 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -302,37 +302,6 @@ struct _pi_queue { typedef void (*pfn_notify)(pi_event event, pi_int32 eventCommandStatus, void *userData); - -class event_callback { -public: - void trigger_callback(pi_event event, pi_int32 currentEventStatus) const { - - auto validParameters = callback_ && event; - - // As a pi_event_status value approaches 0, it gets closer to completion. - // If the calling pi_event's status is less than or equal to the event - // status the user is interested in, invoke the callback anyway. The event - // will have passed through that state anyway. - auto validStatus = currentEventStatus <= observedEventStatus_; - - if (validParameters && validStatus) { - - callback_(event, currentEventStatus, userData_); - } - } - - event_callback(pi_event_status status, pfn_notify callback, void *userData) - : observedEventStatus_{status}, callback_{callback}, userData_{userData} { - } - - pi_event_status get_status() const noexcept { return observedEventStatus_; } - -private: - pi_event_status observedEventStatus_; - pfn_notify callback_; - void *userData_; -}; - /// PI Event mapping to CUevent /// class _pi_event { @@ -347,41 +316,6 @@ class _pi_event { native_type get() const noexcept { return evEnd_; }; - pi_result set_event_complete() noexcept { - - if (isCompleted_) { - return PI_INVALID_OPERATION; - } - - isRecorded_ = true; - isCompleted_ = true; - - trigger_callback(get_execution_status()); - - return PI_SUCCESS; - } - - void trigger_callback(pi_int32 status) { - - std::vector callbacks; - - // Here we move all callbacks into local variable before we call them. - // This is a defensive maneuver; if any of the callbacks attempt to - // add additional callbacks, we will end up in a bad spot. Our mutex - // will be locked twice and the vector will be modified as it is being - // iterated over! By moving everything locally, we can call all of these - // callbacks and let them modify the original vector without much worry. - - { - std::lock_guard lock(mutex_); - event_callbacks_.swap(callbacks); - } - - for (auto &event_callback : callbacks) { - event_callback.trigger_callback(this, status); - } - } - pi_queue get_queue() const noexcept { return queue_; } pi_command_type get_command_type() const noexcept { return commandType_; } @@ -390,10 +324,10 @@ class _pi_event { bool is_recorded() const noexcept { return isRecorded_; } - bool is_completed() const noexcept { return isCompleted_; } - bool is_started() const noexcept { return isStarted_; } + bool is_completed() const noexcept { return isCompleted_; }; + pi_int32 get_execution_status() const noexcept { if (!is_recorded()) { @@ -406,24 +340,8 @@ class _pi_event { return PI_EVENT_COMPLETE; } - void set_event_callback(const event_callback &callback) { - auto current_status = get_execution_status(); - if (current_status <= callback.get_status()) { - callback.trigger_callback(this, current_status); - } else { - std::lock_guard lock(mutex_); - event_callbacks_.emplace_back(callback); - } - } - pi_context get_context() const noexcept { return context_; }; - bool is_user_event() const noexcept { - return get_command_type() == PI_COMMAND_TYPE_USER; - } - - bool is_native_event() const noexcept { return !is_user_event(); } - pi_uint32 increment_reference_count() { return ++refCount_; } pi_uint32 decrement_reference_count() { return --refCount_; } @@ -462,13 +380,14 @@ class _pi_event { std::atomic_uint32_t refCount_; // Event reference count. - std::atomic_bool isCompleted_; // Atomic bool used by user events. Can be - // used to wait for a user event's completion. + bool isCompleted_; // Signifies whether the operations have completed + // bool isRecorded_; // Signifies wether a native CUDA event has been recorded // yet. - bool isStarted_; // Signifies wether the operation associated with the - // PI event has started or not + bool isStarted_; // Signifies wether the operation associated with the + // PI event has started or not + // native_type evEnd_; // CUDA event handle. If this _pi_event represents a user // event, this will be nullptr. @@ -484,12 +403,6 @@ class _pi_event { pi_context context_; // pi_context associated with the event. If this is a // native event, this will be the same context associated // with the queue_ member. - - std::mutex mutex_; // Protect access to event_callbacks_. TODO: There might be - // a lock-free data structure we can use here. - std::vector - event_callbacks_; // Callbacks that can be triggered when an event's state - // changes. }; /// Implementation of PI Program on CUDA Module object From adcf8978b525aef45a5ffc888a42722017987c45 Mon Sep 17 00:00:00 2001 From: Ruyman Reyes Date: Thu, 21 May 2020 17:40:49 +0000 Subject: [PATCH 2/2] Addressing feedback from review --- sycl/plugins/cuda/pi_cuda.cpp | 37 +- sycl/plugins/cuda/pi_cuda.hpp | 6 - sycl/unittests/pi/CMakeLists.txt | 1 - sycl/unittests/pi/EventTest.cpp | 348 ------------------ sycl/unittests/pi/cuda/CMakeLists.txt | 1 - sycl/unittests/pi/cuda/test_events.cpp | 105 ------ .../pi/cuda/test_primary_context.cpp | 2 +- 7 files changed, 10 insertions(+), 490 deletions(-) delete mode 100644 sycl/unittests/pi/EventTest.cpp delete mode 100644 sycl/unittests/pi/cuda/test_events.cpp diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 9afd7610f3ae2..c514d4ad87896 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -276,15 +276,13 @@ _pi_event::_pi_event(pi_command_type type, pi_context context, pi_queue queue) isStarted_{false}, evEnd_{nullptr}, evStart_{nullptr}, evQueued_{nullptr}, queue_{queue}, context_{context} { - if (type != PI_COMMAND_TYPE_USER) { - PI_CHECK_ERROR(cuEventCreate(&evEnd_, CU_EVENT_DEFAULT)); + assert(type != PI_COMMAND_TYPE_USER); - if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) { - PI_CHECK_ERROR(cuEventCreate(&evQueued_, CU_EVENT_DEFAULT)); - PI_CHECK_ERROR(cuEventCreate(&evStart_, CU_EVENT_DEFAULT)); - } - } else { - cl::sycl::detail::pi::die("User-defined events not implemented"); + PI_CHECK_ERROR(cuEventCreate(&evEnd_, CU_EVENT_DEFAULT)); + + if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) { + PI_CHECK_ERROR(cuEventCreate(&evQueued_, CU_EVENT_DEFAULT)); + PI_CHECK_ERROR(cuEventCreate(&evStart_, CU_EVENT_DEFAULT)); } if (queue_ != nullptr) { @@ -2622,24 +2620,7 @@ pi_result cuda_piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index, // Events // pi_result cuda_piEventCreate(pi_context context, pi_event *event) { - assert(context != nullptr); - assert(event != nullptr); - pi_result retErr = PI_SUCCESS; - pi_event retEvent = nullptr; - - try { - retEvent = _pi_event::make_user(context); - if (retEvent == nullptr) { - retErr = PI_OUT_OF_HOST_MEMORY; - } - } catch (pi_result err) { - retErr = err; - } catch (...) { - retErr = PI_OUT_OF_RESOURCES; - } - - *event = retEvent; - return retErr; + cl::sycl::detail::pi::die("PI Event Create not implemented in CUDA backend"); } pi_result cuda_piEventGetInfo(pi_event event, pi_event_info param_name, @@ -2703,13 +2684,13 @@ pi_result cuda_piEventSetCallback(pi_event event, pi_int32 command_exec_callback_type, pfn_notify notify, void *user_data) { - cl::sycl::detail::pi::die("Event Callback not implemented"); + cl::sycl::detail::pi::die("Event Callback not implemented in CUDA backend"); return PI_SUCCESS; } pi_result cuda_piEventSetStatus(pi_event event, pi_int32 execution_status) { - cl::sycl::detail::pi::die("Event Set Status not implemented"); + cl::sycl::detail::pi::die("Event Set Status not implemented in CUDA backend"); return PI_INVALID_VALUE; } diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index fa05dd2e174d0..03220fac1ac47 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -358,12 +358,6 @@ class _pi_event { // pi_uint64 get_end_time() const; - // make a user event. CUDA has no concept of user events, so this - // functionality is implemented by the CUDA PI implementation. - static pi_event make_user(pi_context context) { - return new _pi_event(PI_COMMAND_TYPE_USER, context, nullptr); - } - // construct a native CUDA. This maps closely to the underlying CUDA event. static pi_event make_native(pi_command_type type, pi_queue queue) { return new _pi_event(type, queue->get_context(), queue); diff --git a/sycl/unittests/pi/CMakeLists.txt b/sycl/unittests/pi/CMakeLists.txt index fbaf61fea440f..d1b14617c5c56 100644 --- a/sycl/unittests/pi/CMakeLists.txt +++ b/sycl/unittests/pi/CMakeLists.txt @@ -6,7 +6,6 @@ add_sycl_unittest(PiTests OBJECT EnqueueMemTest.cpp PiMock.cpp PlatformTest.cpp - EventTest.cpp ) add_dependencies(PiTests sycl) diff --git a/sycl/unittests/pi/EventTest.cpp b/sycl/unittests/pi/EventTest.cpp deleted file mode 100644 index 08b4fdcc95aa7..0000000000000 --- a/sycl/unittests/pi/EventTest.cpp +++ /dev/null @@ -1,348 +0,0 @@ -//==---- EventTest.cpp --- PI unit tests --------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "CL/sycl/detail/pi.hpp" -#include "TestGetPlugin.hpp" -#include -#include -#include -#include - -using namespace cl::sycl; - -namespace pi { -class EventTest : public testing::TestWithParam { -protected: - pi_platform _platform; - pi_context _context; - pi_queue _queue; - pi_device _device; - pi_result _result; - - EventTest() - : _context{nullptr}, _queue{nullptr}, _device{nullptr}, - _result{PI_INVALID_VALUE} {} - - ~EventTest() override = default; - - void SetUp() override { - pi_uint32 numPlatforms = 0; - - detail::plugin plugin = GetParam(); - - RecordProperty("PiBackend", GetBackendString(plugin.getBackend())); - - ASSERT_EQ((plugin.call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS); - - ASSERT_EQ((plugin.call_nocheck( - numPlatforms, &_platform, nullptr)), - PI_SUCCESS); - (void)numPlatforms; // Deal with unused variable warning - - ASSERT_EQ((plugin.call_nocheck( - _platform, PI_DEVICE_TYPE_DEFAULT, 1, &_device, nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin.call_nocheck( - nullptr, 1, &_device, nullptr, nullptr, &_context)), - PI_SUCCESS); - - ASSERT_EQ((plugin.call_nocheck( - _context, _device, 0, &_queue)), - PI_SUCCESS); - - _result = PI_INVALID_VALUE; - } - - void TearDown() override { - - detail::plugin plugin = GetParam(); - - ASSERT_EQ((plugin.call_nocheck(_queue)), - PI_SUCCESS); - - ASSERT_EQ( - (plugin.call_nocheck(_context)), - PI_SUCCESS); - } -}; - -static std::vector Plugins = pi::initializeAndRemoveInvalid(); - -INSTANTIATE_TEST_CASE_P( - EventTestImpl, EventTest, testing::ValuesIn(Plugins), - [](const testing::TestParamInfo &info) { - return pi::GetBackendString(info.param.getBackend()); - }); - -// TODO: need more negative tests to show errors being reported when expected -// (invalid arguments etc). - -TEST_P(EventTest, PICreateEvent) { - pi_event foo; - - detail::plugin plugin = GetParam(); - - ASSERT_EQ( - (plugin.call_nocheck(_context, &foo)), - PI_SUCCESS); - ASSERT_NE(foo, nullptr); - - EXPECT_EQ((plugin.call_nocheck(foo)), - PI_SUCCESS); -} - -constexpr size_t event_type_count = 3; -static bool triggered_flag[event_type_count] = {false, false, false}; - -struct callback_user_data { - pi_int32 event_type; - int index; -}; - -void EventCallback(pi_event event, pi_int32 status, void *data) { - ASSERT_NE(data, nullptr); - - callback_user_data *pdata = static_cast(data); - -#ifndef NDEBUG - printf("\tEvent callback %d of type %d triggered\n", pdata->index, - pdata->event_type); -#endif - - triggered_flag[pdata->index] = true; -} - -TEST_P(EventTest, piEventSetCallback) { - - detail::plugin plugin = GetParam(); - - pi_int32 event_callback_types[event_type_count] = { - PI_EVENT_SUBMITTED, PI_EVENT_RUNNING, PI_EVENT_COMPLETE}; - - callback_user_data user_data[event_type_count]; - - // gate event lets us register callbacks before letting the enqueued work be - // executed. - pi_event gateEvent; - ASSERT_EQ((plugin.call_nocheck(_context, - &gateEvent)), - PI_SUCCESS); - - constexpr const size_t dataCount = 1000u; - std::vector data(dataCount); - auto size_in_bytes = data.size() * sizeof(int); - - pi_mem memObj; - ASSERT_EQ( - (plugin.call_nocheck( - _context, PI_MEM_FLAGS_ACCESS_RW, size_in_bytes, nullptr, &memObj)), - PI_SUCCESS); - - pi_event syncEvent; - ASSERT_EQ((plugin.call_nocheck( - _queue, memObj, false, 0, size_in_bytes, data.data(), 1, - &gateEvent, &syncEvent)), - PI_SUCCESS); - - for (size_t i = 0; i < event_type_count; i++) { - user_data[i].event_type = event_callback_types[i]; - user_data[i].index = i; - ASSERT_EQ( - (plugin.call_nocheck( - syncEvent, event_callback_types[i], EventCallback, user_data + i)), - PI_SUCCESS); - } - - ASSERT_EQ((plugin.call_nocheck( - gateEvent, PI_EVENT_COMPLETE)), - PI_SUCCESS); - ASSERT_EQ( - (plugin.call_nocheck(1, &syncEvent)), - PI_SUCCESS); - ASSERT_EQ((plugin.call_nocheck(_queue)), - PI_SUCCESS); - - for (size_t k = 0; k < event_type_count; ++k) { - EXPECT_TRUE(triggered_flag[k]); - } - - ASSERT_EQ((plugin.call_nocheck(gateEvent)), - PI_SUCCESS); - ASSERT_EQ((plugin.call_nocheck(syncEvent)), - PI_SUCCESS); -} - -TEST_P(EventTest, piEventGetInfo) { - - detail::plugin plugin = GetParam(); - - pi_event foo; - ASSERT_EQ( - (plugin.call_nocheck(_context, &foo)), - PI_SUCCESS); - ASSERT_NE(foo, nullptr); - - pi_uint64 paramValue = 0; - pi_uint64 retSize = 0; - EXPECT_EQ((plugin.call_nocheck( - foo, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(paramValue), - ¶mValue, &retSize)), - PI_SUCCESS); - - EXPECT_EQ(retSize, sizeof(pi_int32)); - EXPECT_EQ(paramValue, PI_EVENT_SUBMITTED); - - EXPECT_EQ((plugin.call_nocheck(foo)), - PI_SUCCESS); -} - -TEST_P(EventTest, piEventSetStatus) { - - detail::plugin plugin = GetParam(); - - pi_event foo; - ASSERT_EQ( - (plugin.call_nocheck(_context, &foo)), - PI_SUCCESS); - ASSERT_NE(foo, nullptr); - - pi_event_status paramValue = PI_EVENT_QUEUED; - size_t retSize = 0u; - plugin.call_nocheck( - foo, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(paramValue), - ¶mValue, &retSize); - - EXPECT_EQ((plugin.call_nocheck( - foo, PI_EVENT_COMPLETE)), - PI_SUCCESS); - - paramValue = {}; - retSize = 0u; - ASSERT_EQ((plugin.call_nocheck( - foo, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(paramValue), - ¶mValue, &retSize)), - PI_SUCCESS); - ASSERT_EQ(paramValue, PI_EVENT_COMPLETE); - - EXPECT_EQ((plugin.call_nocheck(foo)), - PI_SUCCESS); -} - -TEST_P(EventTest, WaitForManualEventOnOtherThread) { - - detail::plugin plugin = GetParam(); - - pi_event foo; - ASSERT_EQ( - (plugin.call_nocheck(_context, &foo)), - PI_SUCCESS); - ASSERT_NE(foo, nullptr); - - pi_event_status paramValue = {}; - size_t retSize = 0u; - ASSERT_EQ((plugin.call_nocheck( - foo, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(paramValue), - ¶mValue, &retSize)), - PI_SUCCESS); - ASSERT_EQ(paramValue, PI_EVENT_SUBMITTED); - - std::atomic started{false}; - - auto tWaiter = std::thread([&]() { - started = true; - ASSERT_EQ((plugin.call_nocheck(1, &foo)), - PI_SUCCESS); - }); - - while (!started) { - }; - - ASSERT_EQ((plugin.call_nocheck( - foo, PI_EVENT_COMPLETE)), - PI_SUCCESS); - - tWaiter.join(); - - paramValue = {}; - retSize = 0u; - ASSERT_EQ((plugin.call_nocheck( - foo, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(paramValue), - ¶mValue, &retSize)), - PI_SUCCESS); - ASSERT_EQ(paramValue, PI_EVENT_COMPLETE); - - ASSERT_EQ((plugin.call_nocheck(foo)), - PI_SUCCESS); -} - -TEST_P(EventTest, piEnqueueEventsWait) { - - detail::plugin plugin = GetParam(); - - constexpr const size_t dataCount = 10u; - int output[dataCount] = {}; - const int data[dataCount] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - constexpr const size_t bytes = sizeof(data); - - pi_mem memObj; - ASSERT_EQ((plugin.call_nocheck( - _context, PI_MEM_FLAGS_ACCESS_RW, bytes, nullptr, &memObj)), - PI_SUCCESS); - - pi_event events[4] = {nullptr, nullptr, nullptr, nullptr}; - - ASSERT_EQ((plugin.call_nocheck( - _queue, memObj, true, 0, bytes, data, 0, nullptr, &events[0])), - PI_SUCCESS); - ASSERT_NE(events[0], nullptr); - - ASSERT_EQ( - (plugin.call_nocheck( - _queue, memObj, true, 0, bytes, output, 0, nullptr, &events[1])), - PI_SUCCESS); - ASSERT_NE(events[1], nullptr); - - ASSERT_EQ((plugin.call_nocheck(_context, - &events[2])), - PI_SUCCESS); - ASSERT_NE(events[2], nullptr); - - ASSERT_EQ((plugin.call_nocheck( - _queue, 3, events, &events[3])), - PI_SUCCESS); - ASSERT_NE(events[3], nullptr); - - pi_event_status paramValue = {}; - size_t retSize = 0u; - ASSERT_EQ((plugin.call_nocheck( - events[3], PI_EVENT_INFO_COMMAND_EXECUTION_STATUS, - sizeof(paramValue), ¶mValue, &retSize)), - PI_SUCCESS); - ASSERT_NE(paramValue, PI_EVENT_COMPLETE); - - ASSERT_EQ((plugin.call_nocheck( - events[2], PI_EVENT_COMPLETE)), - PI_SUCCESS); - - ASSERT_EQ( - (plugin.call_nocheck(1, &events[3])), - PI_SUCCESS); - - paramValue = {}; - retSize = 0u; - ASSERT_EQ((plugin.call_nocheck( - events[3], PI_EVENT_INFO_COMMAND_EXECUTION_STATUS, - sizeof(paramValue), ¶mValue, &retSize)), - PI_SUCCESS); - ASSERT_EQ(paramValue, PI_EVENT_COMPLETE); -} - -} // namespace pi diff --git a/sycl/unittests/pi/cuda/CMakeLists.txt b/sycl/unittests/pi/cuda/CMakeLists.txt index 8c1efe21467a3..13ce941adefff 100644 --- a/sycl/unittests/pi/cuda/CMakeLists.txt +++ b/sycl/unittests/pi/cuda/CMakeLists.txt @@ -8,7 +8,6 @@ add_sycl_unittest(PiCudaTests OBJECT test_mem_obj.cpp test_primary_context.cpp test_queue.cpp - test_events.cpp ) add_dependencies(PiCudaTests sycl) diff --git a/sycl/unittests/pi/cuda/test_events.cpp b/sycl/unittests/pi/cuda/test_events.cpp deleted file mode 100644 index 6853b04c63305..0000000000000 --- a/sycl/unittests/pi/cuda/test_events.cpp +++ /dev/null @@ -1,105 +0,0 @@ -//==---- test_events.cpp --- PI unit tests ---------------------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" - -#include - -#include "TestGetPlugin.hpp" -#include -#include -#include -#include -#include - -using namespace cl::sycl; - -namespace pi { -class CudaEventTests : public ::testing::Test { -protected: - detail::plugin plugin = pi::initializeAndGet(backend::cuda); - - pi_platform _platform; - pi_context _context; - pi_queue _queue; - pi_device _device; - - CudaEventTests() : _context{nullptr}, _queue{nullptr}, _device{nullptr} {} - - ~CudaEventTests() override = default; - - void SetUp() override { - pi_uint32 numPlatforms = 0; - ASSERT_EQ(plugin.getBackend(), backend::cuda); - - ASSERT_EQ((plugin.call_nocheck( - 0, nullptr, &numPlatforms)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin.call_nocheck( - numPlatforms, &_platform, nullptr)), - PI_SUCCESS) - << "piPlatformsGet failed.\n"; - - ASSERT_EQ((plugin.call_nocheck( - _platform, PI_DEVICE_TYPE_GPU, 1, &_device, nullptr)), - PI_SUCCESS); - - ASSERT_EQ((plugin.call_nocheck( - nullptr, 1, &_device, nullptr, nullptr, &_context)), - PI_SUCCESS); - - ASSERT_EQ((plugin.call_nocheck( - _context, _device, 0, &_queue)), - PI_SUCCESS); - } - - void TearDown() override { - plugin.call(_queue); - plugin.call(_context); - } -}; - -TEST_F(CudaEventTests, PICreateEvent) { - - pi_event foo; - ASSERT_EQ( - (plugin.call_nocheck(_context, &foo)), - PI_SUCCESS); - ASSERT_NE(foo, nullptr); - // There is no CUDA interop event for user events - EXPECT_EQ(foo->get(), nullptr); - ASSERT_EQ((plugin.call_nocheck(foo)), - PI_SUCCESS); -} - -TEST_F(CudaEventTests, piGetInfoNativeEvent) { - - auto foo = _pi_event::make_native(PI_COMMAND_TYPE_NDRANGE_KERNEL, _queue); - ASSERT_NE(foo, nullptr); - - pi_event_status paramValue = {}; - size_t retSize = 0u; - ASSERT_EQ((plugin.call_nocheck( - foo, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(paramValue), - ¶mValue, &retSize)), - PI_SUCCESS); - EXPECT_EQ(retSize, sizeof(pi_int32)); - EXPECT_EQ(paramValue, PI_EVENT_SUBMITTED); - - auto cuEvent = foo->get(); - ASSERT_NE(cuEvent, nullptr); - - auto errCode = cuEventQuery(cuEvent); - ASSERT_EQ(errCode, CUDA_SUCCESS); - - ASSERT_EQ((plugin.call_nocheck(foo)), - PI_SUCCESS); -} -} // namespace pi diff --git a/sycl/unittests/pi/cuda/test_primary_context.cpp b/sycl/unittests/pi/cuda/test_primary_context.cpp index 50882ab2731aa..244033757fa1c 100644 --- a/sycl/unittests/pi/cuda/test_primary_context.cpp +++ b/sycl/unittests/pi/cuda/test_primary_context.cpp @@ -32,7 +32,7 @@ struct CudaPrimaryContextTests : public ::testing::Test { const std::string platformVersion = platform.get_info(); // If using PI_CUDA, don't accept a non-CUDA device - return platformVersion.find("CUDA") != std::string::npos; + return platformVersion.find("CUDA BACKEND") != std::string::npos; } class cuda_device_selector : public device_selector {