From 777cd8ef71c839f466d63257400508e4d43d9e33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20=C5=BDu=C5=BEek?= Date: Tue, 25 Feb 2020 11:21:44 +0000 Subject: [PATCH] Moved PI files to own folder Removed SYCL detail includes for PI headers Removed SYCL dependencies from CMake Basic CMake setup for plugins Updated OpenCL in CMake Use higher level CMake Independent PIAPI static library Better package Moved headers to `pi` folder Made plugin class public Some clang-format Enable dynamic linking Basic testing Fixed up CUDA plugin CUDA clang-format Simpler CMake for OpenCL plugin Don't link test with plugin Fixed Windows plugin names Proper test dependency tracking Export `piPluginInit` Put all libraries in same folder Made the test more generic Align CMake requirements with LLVM Set up paths for the test Use `CUDA_CUDA_LIBRARY` Updated PI paths in SYCL Structured CMake file Generate basic config package Fixed reference capture bug Removed `piapiConfigVersion.cmake` Packaging improvements Fixed include headers for package Basic level0 plugin Removed SYCL context dependency `contextSetExtendedDeleter` is never used Use `PIAPI_EXPORT` Moved backends enum into `pi.hpp` Compilable `pi.cpp` Build level0 only when SYCL_BUILD_PI_LEVEL_ZERO Removed awkward symbol visibility setting Link threads Doesn't seem to be enough ... Replaced call_once with static initialization Always set CL_TARGET_OPENCL_VERSION Level 0 compiles CUDA plugin compiles Properly include PI Initial attempt at external project Removed extension header External project compiles Full OpenCL include piapi::piapi Basic includes in RT Fix checkPiResult CMake integration done Returned cude_definitions.hpp Set piapi library as a byproduct Moved GlobalPlugin into `plugin.hpp` PI wrapper headers Forward declare `pi::plugin` Include `pi_cuda` Enabled level0 in SYCL Typo fix Returned `__SYCL_EXPORT` Missed `pi.def` include Link `sycl-ls` with `piapi` library Use `pi::backend` Fixed doc headers Reverted some CUDA changes Reverted some namespace changes Moved `pi_sycl.hpp` to detail SYCL Link with CUDA Try __PI_EXPORT__ Missing pi_cuda.h include Reverted removed CUDA includes DPCPP_INTEGRATION Install into LLVM directory `PI_DPCPP_INTEGRATION` Removed PI ld-version-script Better level0 packaging get_link_library_path Level0 fully linked Documentation for get_library_path XPTI support PI configuration hooks Fix for test environment Better options for toggling plugins Bump PI version to 2.0 Use folders in IDEs Install plugins Moved piapi CMake into new file Use RPATH for test Rebase fix Windows fixes for CUDA plugin LLVM copyright header for PI test Use add_subdirectory for piapi --- sycl/CMakeLists.txt | 49 ++- sycl/cmake/modules/AddSYCLExecutable.cmake | 13 +- sycl/cmake/modules/AddSYCLUnitTest.cmake | 9 +- sycl/doc/PluginInterface.md | 6 +- sycl/include/CL/sycl/backend/cuda.hpp | 3 +- sycl/include/CL/sycl/backend_types.hpp | 21 +- sycl/include/CL/sycl/detail/common.hpp | 18 +- .../CL/sycl/detail/device_binary_image.hpp | 2 +- sycl/include/CL/sycl/detail/device_filter.hpp | 43 +- sycl/include/CL/sycl/detail/helpers.hpp | 2 +- sycl/include/CL/sycl/detail/pi_sycl.hpp | 29 ++ .../include/CL/sycl/detail/sycl_mem_obj_i.hpp | 2 +- .../include/CL/sycl/detail/sycl_mem_obj_t.hpp | 7 +- sycl/include/CL/sycl/exception.hpp | 2 +- sycl/include/CL/sycl/info/info_desc.hpp | 13 +- sycl/include/CL/sycl/interop_handle.hpp | 2 +- sycl/piapi.cmake | 36 ++ sycl/piapi/CMakeLists.txt | 165 ++++++++ sycl/piapi/include/pi/device_filter.hpp | 46 +++ .../sycl/detail => piapi/include/pi}/pi.def | 0 .../CL/sycl/detail => piapi/include/pi}/pi.h | 28 +- .../sycl/detail => piapi/include/pi}/pi.hpp | 92 +++-- .../include/pi/pi_cuda.h} | 2 +- .../detail => piapi/include/pi}/plugin.hpp | 68 ++-- sycl/piapi/piapiConfig.cmake.in | 6 + sycl/piapi/plugins/CMakeLists.txt | 34 ++ sycl/piapi/plugins/cuda/CMakeLists.txt | 53 +++ sycl/{ => piapi}/plugins/cuda/pi_cuda.cpp | 369 ++++++++---------- sycl/{ => piapi}/plugins/cuda/pi_cuda.hpp | 15 +- sycl/piapi/plugins/level_zero/CMakeLists.txt | 109 ++++++ .../plugins/level_zero/pi_level_zero.cpp | 4 +- .../plugins/level_zero/pi_level_zero.hpp | 6 +- .../plugins/level_zero/usm_allocator.cpp | 0 .../plugins/level_zero/usm_allocator.hpp | 0 sycl/piapi/plugins/opencl/CMakeLists.txt | 36 ++ sycl/{ => piapi}/plugins/opencl/pi_opencl.cpp | 6 +- sycl/piapi/plugins/test.cpp | 74 ++++ sycl/{source/detail => piapi/src}/pi.cpp | 126 +++--- sycl/piapi/src/plugin_pi_hooks.cpp | 24 ++ .../{source/detail => piapi/src}/posix_pi.cpp | 8 - sycl/piapi/src/shim.cpp | 0 .../detail => piapi/src}/windows_pi.cpp | 10 +- sycl/plugins/CMakeLists.txt | 8 - sycl/plugins/cuda/CMakeLists.txt | 51 --- sycl/plugins/ld-version-script.txt | 10 - sycl/plugins/level_zero/CMakeLists.txt | 116 ------ sycl/plugins/opencl/CMakeLists.txt | 59 --- sycl/source/CMakeLists.txt | 11 +- sycl/source/backend/level_zero.cpp | 2 +- sycl/source/backend/opencl.cpp | 2 +- sycl/source/context.cpp | 7 +- sycl/source/detail/config.hpp | 2 +- sycl/source/detail/context_impl.cpp | 5 +- sycl/source/detail/context_impl.hpp | 2 +- sycl/source/detail/device_binary_image.cpp | 2 +- sycl/source/detail/device_filter.cpp | 10 +- sycl/source/detail/device_impl.hpp | 2 +- sycl/source/detail/device_info.hpp | 4 +- .../detail/error_handling/enqueue_kernel.cpp | 4 +- .../detail/error_handling/error_handling.hpp | 2 +- sycl/source/detail/event_impl.cpp | 9 +- sycl/source/detail/event_impl.hpp | 9 +- sycl/source/detail/event_info.hpp | 2 +- sycl/source/detail/global_handler.cpp | 12 +- sycl/source/detail/global_handler.hpp | 8 +- sycl/source/detail/kernel_impl.hpp | 2 +- sycl/source/detail/kernel_info.hpp | 2 +- sycl/source/detail/kernel_program_cache.cpp | 2 +- sycl/source/detail/kernel_program_cache.hpp | 2 +- sycl/source/detail/pi_hooks.cpp | 44 +++ sycl/source/detail/platform_impl.cpp | 1 + sycl/source/detail/platform_impl.hpp | 12 +- sycl/source/detail/platform_info.hpp | 4 +- sycl/source/detail/plugin_sycl.hpp | 23 ++ sycl/source/detail/program_impl.cpp | 2 +- .../program_manager/program_manager.hpp | 2 +- sycl/source/detail/queue_impl.cpp | 2 +- sycl/source/detail/queue_impl.hpp | 2 +- sycl/source/detail/scheduler/commands.cpp | 29 +- sycl/source/detail/spec_constant_impl.cpp | 2 +- sycl/source/detail/sycl_mem_obj_t.cpp | 2 +- sycl/source/detail/usm/usm_impl.cpp | 4 +- sycl/source/device.cpp | 1 + sycl/source/event.cpp | 2 +- sycl/source/interop_handler.cpp | 2 +- sycl/source/platform.cpp | 1 + sycl/tools/CMakeLists.txt | 13 +- sycl/tools/sycl-ls/CMakeLists.txt | 1 + sycl/unittests/helpers/PiMock.hpp | 4 +- sycl/unittests/kernel-and-program/Cache.cpp | 2 +- sycl/unittests/pi/BackendString.hpp | 6 +- sycl/unittests/pi/EnqueueMemTest.cpp | 4 +- sycl/unittests/pi/PlatformTest.cpp | 4 +- sycl/unittests/pi/TestGetPlugin.hpp | 4 +- sycl/unittests/pi/cuda/test_base_objects.cpp | 8 +- sycl/unittests/pi/cuda/test_commands.cpp | 6 +- sycl/unittests/pi/cuda/test_device.cpp | 6 +- .../pi/cuda/test_interop_get_native.cpp | 1 + sycl/unittests/pi/cuda/test_kernels.cpp | 6 +- sycl/unittests/pi/cuda/test_mem_obj.cpp | 8 +- .../pi/cuda/test_primary_context.cpp | 2 +- sycl/unittests/pi/cuda/test_queue.cpp | 8 +- .../pi/cuda/test_sampler_properties.cpp | 5 +- 103 files changed, 1285 insertions(+), 823 deletions(-) create mode 100644 sycl/include/CL/sycl/detail/pi_sycl.hpp create mode 100644 sycl/piapi.cmake create mode 100644 sycl/piapi/CMakeLists.txt create mode 100644 sycl/piapi/include/pi/device_filter.hpp rename sycl/{include/CL/sycl/detail => piapi/include/pi}/pi.def (100%) rename sycl/{include/CL/sycl/detail => piapi/include/pi}/pi.h (99%) rename sycl/{include/CL/sycl/detail => piapi/include/pi}/pi.hpp (87%) rename sycl/{include/CL/sycl/detail/cuda_definitions.hpp => piapi/include/pi/pi_cuda.h} (92%) rename sycl/{source/detail => piapi/include/pi}/plugin.hpp (63%) create mode 100644 sycl/piapi/piapiConfig.cmake.in create mode 100644 sycl/piapi/plugins/CMakeLists.txt create mode 100644 sycl/piapi/plugins/cuda/CMakeLists.txt rename sycl/{ => piapi}/plugins/cuda/pi_cuda.cpp (93%) rename sycl/{ => piapi}/plugins/cuda/pi_cuda.hpp (99%) create mode 100755 sycl/piapi/plugins/level_zero/CMakeLists.txt rename sycl/{ => piapi}/plugins/level_zero/pi_level_zero.cpp (99%) rename sycl/{ => piapi}/plugins/level_zero/pi_level_zero.hpp (99%) rename sycl/{ => piapi}/plugins/level_zero/usm_allocator.cpp (100%) rename sycl/{ => piapi}/plugins/level_zero/usm_allocator.hpp (100%) create mode 100644 sycl/piapi/plugins/opencl/CMakeLists.txt rename sycl/{ => piapi}/plugins/opencl/pi_opencl.cpp (99%) create mode 100644 sycl/piapi/plugins/test.cpp rename sycl/{source/detail => piapi/src}/pi.cpp (87%) create mode 100644 sycl/piapi/src/plugin_pi_hooks.cpp rename sycl/{source/detail => piapi/src}/posix_pi.cpp (81%) create mode 100644 sycl/piapi/src/shim.cpp rename sycl/{source/detail => piapi/src}/windows_pi.cpp (79%) delete mode 100644 sycl/plugins/CMakeLists.txt delete mode 100644 sycl/plugins/cuda/CMakeLists.txt delete mode 100644 sycl/plugins/ld-version-script.txt delete mode 100755 sycl/plugins/level_zero/CMakeLists.txt delete mode 100644 sycl/plugins/opencl/CMakeLists.txt create mode 100644 sycl/source/detail/pi_hooks.cpp create mode 100644 sycl/source/detail/plugin_sycl.hpp diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 9ed961c0904f0..ca46824df2a5b 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -121,15 +121,43 @@ else() ) endif() +if(MSVC) + set(shared_library_dirname "bin") +else() + set(shared_library_dirname "lib${LLVM_LIBDIR_SUFFIX}") +endif() + +# Retrieves the name of the library +# For example: +# get_library_path(libname "OpenCL" "external" SHARED) +# would likely produce (depending on the system): +# set(libname "external/libOpenCL.so") +function(get_library_path out name directory library_type) + set(library_prefix "${CMAKE_${library_type}_LIBRARY_PREFIX}") + set(library_suffix "${CMAKE_${library_type}_LIBRARY_SUFFIX}") + if(NOT ("${directory}" STREQUAL "")) + set(directory "${directory}/") + endif() + set(${out} + "${directory}${library_prefix}${name}${library_suffix}" PARENT_SCOPE) +endfunction() + +# Retrieves the name of the library needed when linking +# See get_library_path +function(get_link_library_path out name directory) + if(MSVC) + get_library_path(tmp_out "${name}" "${directory}" STATIC) + else() + get_library_path(tmp_out "${name}" "${directory}" SHARED) + endif() + set(${out} "${tmp_out}" PARENT_SCOPE) +endfunction() + if( NOT OpenCL_LIBRARIES ) message(STATUS "OpenCL_LIBRARIES is missing. Will try to download OpenCL ICD Loader from github.com") + get_link_library_path(OpenCL_LIBRARIES "OpenCL" "${LLVM_LIBRARY_OUTPUT_INTDIR}") if(MSVC) - set(OpenCL_LIBRARIES - "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}OpenCL${CMAKE_STATIC_LIBRARY_SUFFIX}") list(APPEND AUX_CMAKE_FLAGS -DOPENCL_ICD_LOADER_REQUIRE_WDK=OFF) - else() - set(OpenCL_LIBRARIES - "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}OpenCL${CMAKE_SHARED_LIBRARY_SUFFIX}") endif() if (CMAKE_C_COMPILER) list(APPEND AUX_CMAKE_FLAGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}) @@ -260,6 +288,9 @@ if (LLVM_ENABLE_ASSERTIONS AND NOT SYCL_DISABLE_STL_ASSERTIONS) endif() endif() +include(piapi.cmake) +add_piapi_library() + set(SYCL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) # SYCL runtime library @@ -311,9 +342,6 @@ option(SYCL_INCLUDE_TESTS "Generate build targets for the SYCL unit tests." ${LLVM_INCLUDE_TESTS}) -# Plugin Library -add_subdirectory( plugins ) - add_subdirectory(tools) if(SYCL_INCLUDE_TESTS) @@ -362,6 +390,11 @@ if(SYCL_BUILD_PI_CUDA) "CUDA support requires adding \"libclc\" to the CMake argument \"LLVM_ENABLE_PROJECTS\"") endif() + find_package(CUDA 10.1 REQUIRED) + + set_target_properties(pi_cuda PROPERTIES + INTERFACE_LINK_LIBRARIES cudadrv + ) add_dependencies(sycl-toolchain libspirv-builtins pi_cuda) list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libspirv-builtins pi_cuda) endif() diff --git a/sycl/cmake/modules/AddSYCLExecutable.cmake b/sycl/cmake/modules/AddSYCLExecutable.cmake index e30463e4b79c4..12a97450b402a 100644 --- a/sycl/cmake/modules/AddSYCLExecutable.cmake +++ b/sycl/cmake/modules/AddSYCLExecutable.cmake @@ -2,7 +2,7 @@ macro(add_sycl_executable ARG_TARGET_NAME) cmake_parse_arguments(ARG "" "" - "OPTIONS;SOURCES;LIBRARIES;DEPENDANTS" + "OPTIONS;SOURCES;LIBRARIES;STATIC_LIBS;DEPENDANTS" ${ARGN}) set(CXX_COMPILER clang++) @@ -11,6 +11,7 @@ macro(add_sycl_executable ARG_TARGET_NAME) set(LIB_POSTFIX ".lib") else() set(LIB_PREFIX "-l") + set(SPLIT_LINK_PATH ON) endif() set(DEVICE_COMPILER_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/${CXX_COMPILER}) @@ -18,6 +19,16 @@ macro(add_sycl_executable ARG_TARGET_NAME) foreach(_lib ${ARG_LIBRARIES}) list(APPEND LINKED_LIBS "${LIB_PREFIX}${_lib}${LIB_POSTFIX}") endforeach() + foreach(_lib ${ARG_STATIC_LIBS}) + if(SPLIT_LINK_PATH) + # Note this has to be added separately so that CMake doesn't get confused + # by the space in between the two arguments + list(APPEND LINKED_LIBS "-L$") + list(APPEND LINKED_LIBS "-l:$") + else() + list(APPEND LINKED_LIBS $) + endif() + endforeach() if (LLVM_ENABLE_ASSERTIONS AND NOT SYCL_DISABLE_STL_ASSERTIONS) if(SYCL_USE_LIBCXX) diff --git a/sycl/cmake/modules/AddSYCLUnitTest.cmake b/sycl/cmake/modules/AddSYCLUnitTest.cmake index c2ddac290601d..9c84303818731 100644 --- a/sycl/cmake/modules/AddSYCLUnitTest.cmake +++ b/sycl/cmake/modules/AddSYCLUnitTest.cmake @@ -90,12 +90,19 @@ macro(add_sycl_unittest_with_device test_dirname link_variant) endif() if ("${link_variant}" MATCHES "OBJECT") + # TODO piapi integration should be fixed, + # replace it with target_link_libraries(${test_dirname} PRIVATE piapi::piapi) + # once add_sycl_executable supports that + set(pi_include_dir "${sycl_inc_dir}/../piapi/include") add_sycl_executable(${test_dirname} - OPTIONS -nolibsycl ${COMMON_OPTS} ${LLVM_PTHREAD_LIB} ${TERMINFO_LIB} + OPTIONS -nolibsycl ${COMMON_OPTS} ${LLVM_PTHREAD_LIB} ${TERMINFO_LIB} ${piapi_options} -I "${pi_include_dir}" -DPI_DPCPP_INTEGRATION SOURCES ${ARGN} $ LIBRARIES gtest_main gtest LLVMSupport LLVMTestingSupport OpenCL ${EXTRA_LIBS} + STATIC_LIBS piapi DEPENDANTS SYCLUnitTests) + add_dependencies(${test_dirname}_exec piapi) else() # TODO support shared library case. endif() + #target_link_libraries(${test_dirname}_exec PRIVATE piapi::piapi) endmacro() diff --git a/sycl/doc/PluginInterface.md b/sycl/doc/PluginInterface.md index 99c2626b89d47..730db708826fc 100644 --- a/sycl/doc/PluginInterface.md +++ b/sycl/doc/PluginInterface.md @@ -72,8 +72,8 @@ SYCL_PI_TRACE=-1 lists all PI Traces above and more debug messages. #### Plugin binary interface Plugins should implement all the Interface APIs required for the PI Version -it supports. There is [pi.def](../include/CL/sycl/detail/pi.def)/ -[pi.h](../include/CL/sycl/detail/pi.h) file listing all PI API names that can be +it supports. There is [pi.def](../piapi/include/pi/pi.def)/ +[pi.h](../piapi/include/pi/pi.h) file listing all PI API names that can be called by the specific version of Plugin Interface. It exports a function - "piPluginInit" that returns the plugin details and function pointer table containing the list of pointers to implemented Interface @@ -136,7 +136,7 @@ into - **Interoperability API** which allows interoperability with underlying runtimes such as OpenCL. -See [pi.h](../include/CL/sycl/detail/pi.h) header for the full list and +See [pi.h](../piapi/include/pi/pi.h) header for the full list and descriptions of PI APIs. ### The Core OpenCL-based PI APIs diff --git a/sycl/include/CL/sycl/backend/cuda.hpp b/sycl/include/CL/sycl/backend/cuda.hpp index 0bc3fa6f4b0aa..3e1557697f0bb 100644 --- a/sycl/include/CL/sycl/backend/cuda.hpp +++ b/sycl/include/CL/sycl/backend/cuda.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -58,4 +59,4 @@ struct interop +#include #include #include #include #include -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { - -enum class backend : char { host, opencl, level_zero, cuda, all }; - -template struct interop; - +namespace pi { inline std::ostream &operator<<(std::ostream &Out, backend be) { switch (be) { case backend::host: @@ -41,6 +36,18 @@ inline std::ostream &operator<<(std::ostream &Out, backend be) { } return Out; } +} // namespace pi + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +using backend = pi::backend; + +template struct interop; + +inline std::ostream &operator<<(std::ostream &Out, backend be) { + return pi::operator<<(Out, be); +} } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/detail/common.hpp b/sycl/include/CL/sycl/detail/common.hpp index b8b2fb68e1400..36d7afa82f73f 100644 --- a/sycl/include/CL/sycl/detail/common.hpp +++ b/sycl/include/CL/sycl/detail/common.hpp @@ -20,6 +20,14 @@ #define __SYCL_STRINGIFY_LINE_HELP(s) #s #define __SYCL_STRINGIFY_LINE(s) __SYCL_STRINGIFY_LINE_HELP(s) +#ifdef XPTI_ENABLE_INSTRUMENTATION +namespace pi { +// We define a sycl stream name and this will be used by the instrumentation +// framework +extern const char *SYCL_STREAM_NAME; +} // namespace pi +#endif // XPTI_ENABLE_INSTRUMENTATION + // Default signature enables the passing of user code location information to // public methods as a default argument. If the end-user wants to disable the // code location information, they must compile the code with @@ -27,11 +35,11 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { -// We define a sycl stream name and this will be used by the instrumentation -// framework -constexpr const char *SYCL_STREAM_NAME = "sycl"; -// Stream name being used for traces generated from the SYCL plugin layer -constexpr const char *SYCL_PICALL_STREAM_NAME = "sycl.pi"; + +#ifdef XPTI_ENABLE_INSTRUMENTATION +using pi::SYCL_STREAM_NAME; +#endif // XPTI_ENABLE_INSTRUMENTATION + // Data structure that captures the user code location information using the // builtin capabilities of the compiler struct code_location { diff --git a/sycl/include/CL/sycl/detail/device_binary_image.hpp b/sycl/include/CL/sycl/detail/device_binary_image.hpp index a74523801175d..6e458fa5547de 100644 --- a/sycl/include/CL/sycl/detail/device_binary_image.hpp +++ b/sycl/include/CL/sycl/detail/device_binary_image.hpp @@ -8,7 +8,7 @@ #pragma once #include -#include +#include #include diff --git a/sycl/include/CL/sycl/detail/device_filter.hpp b/sycl/include/CL/sycl/detail/device_filter.hpp index b65cf709d9dc0..88a74ac126070 100644 --- a/sycl/include/CL/sycl/detail/device_filter.hpp +++ b/sycl/include/CL/sycl/detail/device_filter.hpp @@ -11,44 +11,16 @@ #include #include #include +#include #include #include -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { - -struct device_filter { - backend Backend = backend::all; - info::device_type DeviceType = info::device_type::all; - int DeviceNum = 0; - bool HasBackend = false; - bool HasDeviceType = false; - bool HasDeviceNum = false; - int MatchesSeen = 0; - - device_filter(){}; - device_filter(const std::string &FilterString); - friend std::ostream &operator<<(std::ostream &Out, - const device_filter &Filter); -}; - -class device_filter_list { - std::vector FilterList; - -public: - device_filter_list() {} - device_filter_list(const std::string &FilterString); - device_filter_list(device_filter &Filter); - void addFilter(device_filter &Filter); - std::vector &get() { return FilterList; } - friend std::ostream &operator<<(std::ostream &Out, - const device_filter_list &List); -}; +namespace pi { inline std::ostream &operator<<(std::ostream &Out, const device_filter &Filter) { + namespace info = cl::sycl::info; Out << Filter.Backend << ":"; if (Filter.DeviceType == info::device_type::host) { Out << "host"; @@ -78,6 +50,15 @@ inline std::ostream &operator<<(std::ostream &Out, return Out; } +} // namespace pi + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +using pi::device_filter; +using pi::device_filter_list; + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/detail/helpers.hpp b/sycl/include/CL/sycl/detail/helpers.hpp index c6454ed01151e..d4b8efbe674a8 100644 --- a/sycl/include/CL/sycl/detail/helpers.hpp +++ b/sycl/include/CL/sycl/detail/helpers.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #if __cpp_lib_bit_cast diff --git a/sycl/include/CL/sycl/detail/pi_sycl.hpp b/sycl/include/CL/sycl/detail/pi_sycl.hpp new file mode 100644 index 0000000000000..3ac6998d51ca8 --- /dev/null +++ b/sycl/include/CL/sycl/detail/pi_sycl.hpp @@ -0,0 +1,29 @@ +//==---------------- pi_sycl.hpp - SYCL wrapper for PI ---------*- C++ -*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===--------------------------------------------------------------------=== // + +#pragma once + +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +namespace RT = ::pi; + +namespace detail { + +namespace RT = ::pi; +using PiApiKind = ::PiApiKind; +namespace pi { +using namespace ::pi; +} + +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/detail/sycl_mem_obj_i.hpp b/sycl/include/CL/sycl/detail/sycl_mem_obj_i.hpp index 274f0511a5361..80e37c135fca4 100644 --- a/sycl/include/CL/sycl/detail/sycl_mem_obj_i.hpp +++ b/sycl/include/CL/sycl/detail/sycl_mem_obj_i.hpp @@ -8,7 +8,7 @@ #pragma once -#include +#include #include __SYCL_INLINE_NAMESPACE(cl) { diff --git a/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp b/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp index 22ebb8ee97109..f305d6c1dee86 100644 --- a/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp +++ b/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp @@ -22,6 +22,10 @@ #include #include +namespace pi { +class plugin; +} + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { @@ -29,7 +33,6 @@ namespace detail { // Forward declarations class context_impl; class event_impl; -class plugin; using ContextImplPtr = shared_ptr_class; using EventImplPtr = shared_ptr_class; @@ -86,7 +89,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { virtual ~SYCLMemObjT() = default; - const plugin &getPlugin() const; + const pi::plugin &getPlugin() const; __SYCL_DLL_LOCAL size_t getSize() const override { return MSizeInBytes; } __SYCL_DLL_LOCAL size_t get_count() const { diff --git a/sycl/include/CL/sycl/exception.hpp b/sycl/include/CL/sycl/exception.hpp index 0004bc311a4cd..007d8555e467a 100644 --- a/sycl/include/CL/sycl/exception.hpp +++ b/sycl/include/CL/sycl/exception.hpp @@ -12,8 +12,8 @@ #include #include -#include #include +#include #include diff --git a/sycl/include/CL/sycl/info/info_desc.hpp b/sycl/include/CL/sycl/info/info_desc.hpp index a1c368cdc5519..6a0ff02b123f8 100644 --- a/sycl/include/CL/sycl/info/info_desc.hpp +++ b/sycl/include/CL/sycl/info/info_desc.hpp @@ -9,7 +9,7 @@ #pragma once #include -#include +#include #include __SYCL_INLINE_NAMESPACE(cl) { @@ -133,16 +133,7 @@ enum class device : cl_device_info { usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT }; -enum class device_type : pi_uint64 { - cpu = PI_DEVICE_TYPE_CPU, - gpu = PI_DEVICE_TYPE_GPU, - accelerator = PI_DEVICE_TYPE_ACC, - // TODO: figure out if we need all the below in PI - custom = CL_DEVICE_TYPE_CUSTOM, - automatic, - host, - all = CL_DEVICE_TYPE_ALL -}; +using device_type = pi::device_type; enum class partition_property : cl_device_partition_property { no_partition = 0, diff --git a/sycl/include/CL/sycl/interop_handle.hpp b/sycl/include/CL/sycl/interop_handle.hpp index a09d8cee7ccaa..c3235e55b28e4 100644 --- a/sycl/include/CL/sycl/interop_handle.hpp +++ b/sycl/include/CL/sycl/interop_handle.hpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include diff --git a/sycl/piapi.cmake b/sycl/piapi.cmake new file mode 100644 index 0000000000000..b643a68304cc5 --- /dev/null +++ b/sycl/piapi.cmake @@ -0,0 +1,36 @@ +set(xpti_include_dir "") +if(SYCL_ENABLE_XPTI_TRACING) + set(xpti_include_dir "${LLVM_EXTERNAL_XPTI_SOURCE_DIR}/include") +endif() + +# Set values for the library +set(PI_BUILD_CUDA "${SYCL_BUILD_PI_CUDA}") +set(PI_BUILD_LEVEL_ZERO ON) +set(SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE "${SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE}") +set(PI_LIBDIR_SUFFIX "${LLVM_LIBDIR_SUFFIX}") +set(PI_XPTI_INCLUDE_DIR "${xpti_include_dir}") + +# Include the library +add_subdirectory(piapi) + +# Enables the piapi library +function(add_piapi_library) + add_library(piapi::piapi ALIAS piapi) +endfunction() + +# PI has special code for DPC++ integration +target_compile_definitions(piapi INTERFACE PI_DPCPP_INTEGRATION) +target_include_directories(piapi + INTERFACE + "${sycl_inc_dir}" +) + +if (MSVC AND CMAKE_BUILD_TYPE MATCHES "Debug") + set(XPTI_LIB xptid) +else() + set(XPTI_LIB xpti) +endif() +if(SYCL_ENABLE_XPTI_TRACING) + #target_link_libraries(pi_level_zero PRIVATE ${XPTI_LIB}) +endif() + diff --git a/sycl/piapi/CMakeLists.txt b/sycl/piapi/CMakeLists.txt new file mode 100644 index 0000000000000..812f891404229 --- /dev/null +++ b/sycl/piapi/CMakeLists.txt @@ -0,0 +1,165 @@ +cmake_minimum_required(VERSION 3.4) +project(piapi VERSION 2.0 LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +include(ExternalProject) +find_package(Threads REQUIRED) + +set_property(GLOBAL PROPERTY USE_FOLDERS ON) + +################################################################################ +# Options + +# TODO: Not possible to turn this off at the moment +option(PI_OPENCL_HEADERS "Should OpenCL headers be included" ON) + +option(PI_BUILD_CUDA "Whether to build the pi_cuda plugin" OFF) +option(PI_BUILD_LEVEL_ZERO "Whether to build the pi_level_zero plugin" OFF) + +set(PI_LIBDIR_SUFFIX "" CACHE STRING "Library suffix when installing") +set(PI_XPTI_INCLUDE_DIR "" CACHE STRING "Location of the XPTI include directory") + +################################################################################ +# Main library + +set(pi_include_dir "${PROJECT_SOURCE_DIR}/include") +set(pi_source_dir "${PROJECT_SOURCE_DIR}/src") +set(pi_library_base_dir "${CMAKE_CURRENT_BINARY_DIR}") +set(pi_library_output_dir "${pi_library_base_dir}/lib${PI_LIBDIR_SUFFIX}") +set(pi_binary_output_dir "${pi_library_base_dir}/bin") + +set(CONF_INCLUDE_DIRS "") + +add_library(piapi STATIC + "${pi_include_dir}/pi/pi.h" + "${pi_include_dir}/pi/pi.hpp" + "${pi_source_dir}/pi.cpp" + "$<$:${pi_source_dir}/windows_pi.cpp>" + "$<$,$>:${pi_source_dir}/posix_pi.cpp>" +) +set_target_properties(piapi PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY "${pi_library_output_dir}" +) +list(APPEND CONF_INCLUDE_DIRS "${pi_include_dir}") +list(APPEND CONF_INCLUDE_DIRS "${CMAKE_CURRENT_BINARY_DIR}") +target_include_directories(piapi PUBLIC + "$" + "$" +) +target_include_directories(piapi INTERFACE + "$" +) +target_link_libraries(piapi PUBLIC ${CMAKE_DL_LIBS}) +target_link_libraries(piapi PUBLIC Threads::Threads) +target_compile_definitions(piapi PUBLIC CL_TARGET_OPENCL_VERSION=210) + +################################################################################ +# Export header + +set(CMAKE_CXX_VISIBILITY_PRESET hidden) +set(CMAKE_VISIBILITY_INLINES_HIDDEN 1) + +# Define export header for the plugins +# We don't have the plugins yet, so we rely on a shim shared library +set(pi_export_header "${CMAKE_CURRENT_BINARY_DIR}/piapi_export.h") +add_library(pi_export_library_shim EXCLUDE_FROM_ALL SHARED "${pi_source_dir}/shim.cpp") +include(GenerateExportHeader) +generate_export_header(pi_export_library_shim + BASE_NAME PIAPI + EXPORT_MACRO_NAME PIAPI_EXPORT + EXPORT_FILE_NAME "${pi_export_header}" + STATIC_DEFINE PIAPI_STATIC +) + +################################################################################ +# OpenCL + +if(PI_OPENCL_HEADERS) + find_package(OpenCL REQUIRED) + + # Do not link against the OpenCL::OpenCL library, just need the headers + target_include_directories(piapi PUBLIC + "${OpenCL_INCLUDE_DIR}" + ) +endif() + +################################################################################ +# XPTI tracing + +if(PI_XPTI_INCLUDE_DIR) + target_compile_definitions(piapi PRIVATE XPTI_ENABLE_INSTRUMENTATION=1) + target_include_directories(piapi PUBLIC "${PI_XPTI_INCLUDE_DIR}") +endif() + +################################################################################ +# Install + +# Export the package for use from the build-tree +export(PACKAGE piapi) + +install(DIRECTORY "${pi_include_dir}" + DESTINATION . COMPONENT piapi +) +install(TARGETS piapi + EXPORT piapiTargets + ARCHIVE DESTINATION "lib${PI_LIBDIR_SUFFIX}" COMPONENT piapi + RUNTIME DESTINATION "bin" COMPONENT piapi +) + +################################################################################ +# Interface library that simplifies exporting symbols for the plugins + +add_library(pi_export_library INTERFACE) +target_include_directories(pi_export_library INTERFACE "${pi_library_output_dir}") +target_compile_definitions(pi_export_library INTERFACE + pi_export_library_shim_EXPORTS + PI_REQUIRES_EXPORT +) +target_sources(pi_export_library INTERFACE + "${pi_source_dir}/plugin_pi_hooks.cpp" +) + +################################################################################ +# Tests + +enable_testing() + +################################################################################ +# Plugins + +add_subdirectory(plugins) + +################################################################################ +# Package + +include(CMakePackageConfigHelpers) +write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/piapiConfigVersion.cmake + VERSION ${PROJECT_VERSION} + COMPATIBILITY ExactVersion +) + +configure_file(piapiConfig.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/piapiConfig.cmake + COPYONLY +) + +set(config_package_location lib/cmake/piapi) +install(EXPORT piapiTargets + FILE piapiTargets.cmake + NAMESPACE piapi:: + DESTINATION ${config_package_location} + COMPONENT piapi +) + +install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/piapiConfig.cmake + ${CMAKE_CURRENT_BINARY_DIR}/piapiConfigVersion.cmake + DESTINATION ${config_package_location} + COMPONENT piapi +) diff --git a/sycl/piapi/include/pi/device_filter.hpp b/sycl/piapi/include/pi/device_filter.hpp new file mode 100644 index 0000000000000..16f573aab7b82 --- /dev/null +++ b/sycl/piapi/include/pi/device_filter.hpp @@ -0,0 +1,46 @@ +//==---------- pi_device_filter.hpp - PI device filter ---------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "pi.hpp" + +#include +#include + +namespace pi { + +struct device_filter { + backend Backend = backend::all; + pi::device_type DeviceType = pi::device_type::all; + int DeviceNum = 0; + bool HasBackend = false; + bool HasDeviceType = false; + bool HasDeviceNum = false; + int MatchesSeen = 0; + + device_filter(){}; + device_filter(const std::string &FilterString); + friend std::ostream &operator<<(std::ostream &Out, + const device_filter &Filter); +}; + +class device_filter_list { + std::vector FilterList; + +public: + device_filter_list() {} + device_filter_list(const std::string &FilterString); + device_filter_list(device_filter &Filter); + void addFilter(device_filter &Filter); + std::vector &get() { return FilterList; } + friend std::ostream &operator<<(std::ostream &Out, + const device_filter_list &List); +}; + +} // namespace pi diff --git a/sycl/include/CL/sycl/detail/pi.def b/sycl/piapi/include/pi/pi.def similarity index 100% rename from sycl/include/CL/sycl/detail/pi.def rename to sycl/piapi/include/pi/pi.def diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/piapi/include/pi/pi.h similarity index 99% rename from sycl/include/CL/sycl/detail/pi.h rename to sycl/piapi/include/pi/pi.h index 72c2dcceb0c26..28cafc49c4346 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/piapi/include/pi/pi.h @@ -22,6 +22,20 @@ #ifndef _PI_H_ #define _PI_H_ +#ifdef PI_REQUIRES_EXPORT +#include +#else +#define PIAPI_EXPORT +#endif // PI_REQUIRES_EXPORT + +#ifdef PI_DPCPP_INTEGRATION +#include +#else +#ifndef __SYCL_EXPORT +#define __SYCL_EXPORT PIAPI_EXPORT +#endif // __SYCL_EXPORT +#endif // PI_DPCPP_INTEGRATION + // Every single change in PI API should be accompanied with the minor // version increase (+1). In the cases where backward compatibility is not // maintained there should be a (+1) change to the major version in @@ -34,9 +48,13 @@ // pi_device_binary_property_set PropertySetsBegin; // pi_device_binary_property_set PropertySetsEnd; // 2. A number of types needed to define pi_device_binary_property_set added. +// -- Version 2.0: +// * PI as a standalone library +// * Everything moved out of the cl::sycl namespace into the pi namespace +// * Global C++ objects moved into pi namespace and made external // -#define _PI_H_VERSION_MAJOR 1 -#define _PI_H_VERSION_MINOR 2 +#define _PI_H_VERSION_MAJOR 2 +#define _PI_H_VERSION_MINOR 0 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -45,9 +63,9 @@ // TODO: we need a mapping of PI to OpenCL somewhere, and this can be done // elsewhere, e.g. in the pi_opencl, but constants/enums mapping is now // done here, for efficiency and simplicity. +#include +#include #include -#include -#include #include #ifdef __cplusplus @@ -1573,7 +1591,7 @@ struct _pi_plugin { char *Targets; struct FunctionPointers { #define _PI_API(api) decltype(::api) *api; -#include +#include } PiFunctionTable; }; diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/piapi/include/pi/pi.hpp similarity index 87% rename from sycl/include/CL/sycl/detail/pi.hpp rename to sycl/piapi/include/pi/pi.hpp index 3509f9a73cbb4..12eece332542f 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/piapi/include/pi/pi.hpp @@ -13,14 +13,12 @@ #pragma once -#include -#include -#include -#include -#include +#include #include #include +#include +#include #include #include #include @@ -32,20 +30,30 @@ struct trace_event_data_t; } #endif -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { - -class context; - -namespace detail { - enum class PiApiKind { #define _PI_API(api) api, -#include +#include }; -class plugin; + namespace pi { +enum class backend : char { host, opencl, level_zero, cuda, all }; + +enum class device_type : pi_uint64 { + cpu = PI_DEVICE_TYPE_CPU, + gpu = PI_DEVICE_TYPE_GPU, + accelerator = PI_DEVICE_TYPE_ACC, + // TODO: figure out if we need all the below in PI + custom = CL_DEVICE_TYPE_CUSTOM, + automatic, + host, + all = CL_DEVICE_TYPE_ALL +}; + +// Forward declarations +class plugin; +class device_filter_list; + // The SYCL_PI_TRACE sets what we will trace. // This is a bit-mask of various things we'd want to trace. enum TraceLevel { @@ -54,23 +62,29 @@ enum TraceLevel { PI_TRACE_ALL = -1 }; +namespace config { +extern TraceLevel trace_level_mask(); +extern pi::backend *backend(); +extern pi::device_filter_list *device_filter_list(); +} // namespace config + // Return true if we want to trace PI related activities. bool trace(TraceLevel level); -#ifdef __SYCL_RT_OS_WINDOWS -#define __SYCL_OPENCL_PLUGIN_NAME "pi_opencl.dll" -#define __SYCL_LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll" -#define __SYCL_CUDA_PLUGIN_NAME "pi_cuda.dll" +#ifdef _WIN32 +#define PI_OPENCL_PLUGIN_NAME "pi_opencl.dll" +#define PI_LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll" +#define PI_CUDA_PLUGIN_NAME "pi_cuda.dll" #else -#define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.so" -#define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.so" -#define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.so" +#define PI_OPENCL_PLUGIN_NAME "libpi_opencl.so" +#define PI_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.so" +#define PI_CUDA_PLUGIN_NAME "libpi_cuda.so" #endif // Report error and no return (keeps compiler happy about no return statements). -[[noreturn]] __SYCL_EXPORT void die(const char *Message); +[[noreturn]] PIAPI_EXPORT void die(const char *Message); -__SYCL_EXPORT void assertion(bool Condition, const char *Message = nullptr); +PIAPI_EXPORT void assertion(bool Condition, const char *Message = nullptr); template void handleUnknownParamName(const char *functionName, T parameter) { @@ -85,8 +99,8 @@ void handleUnknownParamName(const char *functionName, T parameter) { // This macro is used to report invalid enumerators being passed to PI API // GetInfo functions. It will print the name of the function that invoked it // and the value of the unknown enumerator. -#define __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(parameter) \ - { cl::sycl::detail::pi::handleUnknownParamName(__func__, parameter); } +#define PI_HANDLE_UNKNOWN_PARAM_NAME(parameter) \ + { pi::handleUnknownParamName(__func__, parameter); } using PiPlugin = ::pi_plugin; using PiResult = ::pi_result; @@ -115,10 +129,6 @@ using PiMemObjectType = ::pi_mem_type; using PiMemImageChannelOrder = ::pi_image_channel_order; using PiMemImageChannelType = ::pi_image_channel_type; -__SYCL_EXPORT void contextSetExtendedDeleter(const cl::sycl::context &constext, - pi_context_extended_deleter func, - void *user_data); - // Function to load the shared library // Implementation is OS dependent. void *loadOsLibrary(const std::string &Library); @@ -133,13 +143,8 @@ std::string platformInfoToString(pi_platform_info info); // Want all the needed casts be explicit, do not define conversion operators. template To cast(From value); -// Holds the PluginInformation for the plugin that is bound. -// Currently a global variable is used to store OpenCL plugin information to be -// used with SYCL Interoperability Constructors. -extern std::shared_ptr GlobalPlugin; - // Performs PI one-time initialization. -const vector_class &initialize(); +const std::vector &initialize(); // Get the plugin serving given backend. template const plugin &getPlugin(); @@ -155,7 +160,7 @@ template struct PiFuncInfo {}; return MPlugin.PiFunctionTable.api; \ } \ }; -#include +#include /// Emits an XPTI trace before a PI API call is made /// \param FName The name of the PI API call @@ -388,8 +393,6 @@ PiDeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData, } // namespace pi -namespace RT = cl::sycl::detail::pi; - // Workaround for build with GCC 5.x // An explicit specialization shall be declared in the namespace block. // Having namespace as part of template name is not supported by GCC @@ -400,26 +403,19 @@ namespace pi { // operators. template inline To cast(From value) { // TODO: see if more sanity checks are possible. - RT::assertion((sizeof(From) == sizeof(To)), "assert: cast failed size check"); + pi::assertion((sizeof(From) == sizeof(To)), "assert: cast failed size check"); return (To)(value); } // These conversions should use PI interop API. template <> inline pi::PiProgram cast(cl_program) { - RT::assertion(false, "pi::cast -> use piextCreateProgramWithNativeHandle"); + pi::assertion(false, "pi::cast -> use piextCreateProgramWithNativeHandle"); return {}; } template <> inline pi::PiDevice cast(cl_device_id) { - RT::assertion(false, "pi::cast -> use piextCreateDeviceWithNativeHandle"); + pi::assertion(false, "pi::cast -> use piextCreateDeviceWithNativeHandle"); return {}; } } // namespace pi -} // namespace detail - -// For shortness of using PI from the top-level sycl files. -namespace RT = cl::sycl::detail::pi; - -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/detail/cuda_definitions.hpp b/sycl/piapi/include/pi/pi_cuda.h similarity index 92% rename from sycl/include/CL/sycl/detail/cuda_definitions.hpp rename to sycl/piapi/include/pi/pi_cuda.h index 9af7dbdab0152..701325e7b4717 100644 --- a/sycl/include/CL/sycl/detail/cuda_definitions.hpp +++ b/sycl/piapi/include/pi/pi_cuda.h @@ -1,4 +1,4 @@ -//==------------ cuda_definitions.hpp - SYCL CUDA backend ------------------==// +//==------------ pi_cuda.h - PI CUDA backend ------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/sycl/source/detail/plugin.hpp b/sycl/piapi/include/pi/plugin.hpp similarity index 63% rename from sycl/source/detail/plugin.hpp rename to sycl/piapi/include/pi/plugin.hpp index 756047f3adc4e..d3dead22d57a6 100644 --- a/sycl/source/detail/plugin.hpp +++ b/sycl/piapi/include/pi/plugin.hpp @@ -7,22 +7,29 @@ //===----------------------------------------------------------------------===// #pragma once -#include -#include -#include -#include + +#include #ifdef XPTI_ENABLE_INSTRUMENTATION // Include the headers necessary for emitting traces using the trace framework #include "xpti_trace_framework.h" #endif -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { -#ifdef XPTI_ENABLE_INSTRUMENTATION -extern xpti::trace_event_data_t *GPICallEvent; -#endif +#define PI_CHECK_OCL_CODE_THROW_HELPER(X, EXC) \ + if (X != 0) { \ + throw EXC; \ + } + +#ifdef PI_DPCPP_INTEGRATION +#include +#else +#ifndef __SYCL_CHECK_OCL_CODE_THROW +#define __SYCL_CHECK_OCL_CODE_THROW(X, EXC) \ + PI_CHECK_OCL_CODE_THROW_HELPER(X, EXC{}) +#endif // __SYCL_CHECK_OCL_CODE_THROW +#endif // PI_DPCPP_INTEGRATION + +namespace pi { /// The plugin class provides a unified interface to the underlying low-level /// runtimes for the device-agnostic SYCL runtime. /// @@ -31,7 +38,7 @@ class plugin { public: plugin() = delete; - plugin(RT::PiPlugin Plugin, backend UseBackend) + plugin(pi::PiPlugin Plugin, backend UseBackend) : MPlugin(Plugin), MBackend(UseBackend) {} plugin &operator=(const plugin &) = default; @@ -41,14 +48,14 @@ class plugin { ~plugin() = default; - const RT::PiPlugin &getPiPlugin() const { return MPlugin; } - RT::PiPlugin &getPiPlugin() { return MPlugin; } + const pi::PiPlugin &getPiPlugin() const { return MPlugin; } + pi::PiPlugin &getPiPlugin() { return MPlugin; } /// Checks return value from PI calls. /// /// \throw Exception if pi_result is not a PI_SUCCESS. - template - void checkPiResult(RT::PiResult pi_result) const { + template + void checkPiResult(pi::PiResult pi_result) const { __SYCL_CHECK_OCL_CODE_THROW(pi_result, Exception); } @@ -63,8 +70,8 @@ class plugin { /// /// \sa plugin::checkPiResult template - RT::PiResult call_nocheck(ArgsT... Args) const { - RT::PiFuncInfo PiCallInfo; + pi::PiResult call_nocheck(ArgsT... Args) const { + pi::PiFuncInfo PiCallInfo; #ifdef XPTI_ENABLE_INSTRUMENTATION // Emit a function_begin trace for the PI API before the call is executed. // If arguments need to be captured, then a data structure can be sent in @@ -75,12 +82,12 @@ class plugin { if (pi::trace(pi::TraceLevel::PI_TRACE_CALLS)) { std::string FnName = PiCallInfo.getFuncName(); std::cout << "---> " << FnName << "(" << std::endl; - RT::printArgs(Args...); + pi::printArgs(Args...); } - RT::PiResult R = PiCallInfo.getFuncPtr(MPlugin)(Args...); + pi::PiResult R = PiCallInfo.getFuncPtr(MPlugin)(Args...); if (pi::trace(pi::TraceLevel::PI_TRACE_CALLS)) { std::cout << ") ---> "; - RT::printArgs(R); + pi::printArgs(R); } #ifdef XPTI_ENABLE_INSTRUMENTATION // Close the function begin with a call to function end @@ -94,16 +101,27 @@ class plugin { /// \throw cl::sycl::runtime_exception if the call was not successful. template void call(ArgsT... Args) const { - RT::PiResult Err = call_nocheck(Args...); + pi::PiResult Err = call_nocheck(Args...); checkPiResult(Err); } backend getBackend(void) const { return MBackend; } private: - RT::PiPlugin MPlugin; + pi::PiPlugin MPlugin; backend MBackend; }; // class plugin -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) + +template <> +inline void +plugin::checkPiResult(pi::PiResult pi_result) const { + PI_CHECK_OCL_CODE_THROW_HELPER(pi_result, + std::runtime_error{"Invalid PIAPI call"}); +} + +// Holds the PluginInformation for the plugin that is bound. +// Currently a global variable is used to store OpenCL plugin information to be +// used with SYCL Interoperability Constructors. +extern std::shared_ptr GlobalPlugin; + +} // namespace pi diff --git a/sycl/piapi/piapiConfig.cmake.in b/sycl/piapi/piapiConfig.cmake.in new file mode 100644 index 0000000000000..7d7a67bdad996 --- /dev/null +++ b/sycl/piapi/piapiConfig.cmake.in @@ -0,0 +1,6 @@ +include(CMakeFindDependencyMacro) + +find_dependency(CUDA 10.0) +find_dependency(OpenCL) + +include(${CMAKE_CURRENT_LIST_DIR}/piapiTargets.cmake) diff --git a/sycl/piapi/plugins/CMakeLists.txt b/sycl/piapi/plugins/CMakeLists.txt new file mode 100644 index 0000000000000..555922d74513d --- /dev/null +++ b/sycl/piapi/plugins/CMakeLists.txt @@ -0,0 +1,34 @@ +set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) + +add_executable(test_pi_plugins EXCLUDE_FROM_ALL + "test.cpp" +) +target_link_libraries(test_pi_plugins PRIVATE + piapi +) +add_test(NAME pi_plugins COMMAND test_pi_plugins) + +# The test assumes pi_opencl is always available +if(WIN32) + set_tests_properties(pi_plugins PROPERTIES + ENVIRONMENT "PATH=$;$ENV{PATH}" + ) +else() + set_target_properties(test_pi_plugins PROPERTIES + INSTALL_RPATH "$" + BUILD_WITH_INSTALL_RPATH ON + ) +endif() + +add_subdirectory(opencl) +add_dependencies(test_pi_plugins pi_opencl) + +if(PI_BUILD_CUDA) + add_subdirectory(cuda) + add_dependencies(test_pi_plugins pi_cuda) +endif() + +if(PI_BUILD_LEVEL_ZERO) + add_subdirectory(level_zero) + add_dependencies(test_pi_plugins pi_level_zero) +endif () diff --git a/sycl/piapi/plugins/cuda/CMakeLists.txt b/sycl/piapi/plugins/cuda/CMakeLists.txt new file mode 100644 index 0000000000000..12d30efa7ca0b --- /dev/null +++ b/sycl/piapi/plugins/cuda/CMakeLists.txt @@ -0,0 +1,53 @@ +message(STATUS "Including the PI API CUDA backend.") + + # cannot rely on cmake support for CUDA; it assumes runtime API is being used. + # we only require the CUDA driver API to be used + # CUDA_CUDA_LIBRARY variable defines the path to libcuda.so, the CUDA Driver API library. + +find_package(CUDA 10.1 REQUIRED) + +# Make imported library global to use it within the project. +add_library(cudadrv SHARED IMPORTED GLOBAL) + +set_target_properties(cudadrv PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} +) + +if(WIN32) + set_target_properties(cudadrv PROPERTIES + IMPORTED_IMPLIB ${CUDA_CUDA_LIBRARY} + ) +else() + set_target_properties(cudadrv PROPERTIES + IMPORTED_LOCATION ${CUDA_CUDA_LIBRARY} + ) +endif() + +add_library(pi_cuda SHARED + "${pi_include_dir}/pi/pi.h" + "${pi_include_dir}/pi/pi.hpp" + "pi_cuda.hpp" + "pi_cuda.cpp" +) +set_target_properties(pi_cuda PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY "${pi_library_output_dir}" + LIBRARY_OUTPUT_DIRECTORY "${pi_library_output_dir}" + RUNTIME_OUTPUT_DIRECTORY "${pi_binary_output_dir}" +) + +target_link_libraries(pi_cuda PUBLIC piapi cudadrv) +target_link_libraries(pi_cuda PRIVATE pi_export_library) + +target_include_directories(pi_cuda PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") + +target_include_directories(pi_cuda PUBLIC ${CUDA_INCLUDE_DIRS}) + +install(TARGETS pi_cuda + EXPORT piapiTargets + LIBRARY DESTINATION "lib${PI_LIBDIR_SUFFIX}" COMPONENT pi_cuda + RUNTIME DESTINATION "bin" COMPONENT pi_cuda +) +export(TARGETS pi_cuda APPEND + FILE "${PROJECT_BINARY_DIR}/piapiTargets.cmake" +) +install(FILES "pi_cuda.hpp" DESTINATION include/pi COMPONENT headers) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/piapi/plugins/cuda/pi_cuda.cpp similarity index 93% rename from sycl/plugins/cuda/pi_cuda.cpp rename to sycl/piapi/plugins/cuda/pi_cuda.cpp index 4bcf33403a55f..c4180b3805a3d 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/piapi/plugins/cuda/pi_cuda.cpp @@ -11,10 +11,9 @@ /// /// \ingroup sycl_pi_cuda -#include -#include -#include -#include +#include "pi_cuda.hpp" +#include +#include #include #include @@ -25,6 +24,12 @@ #include #include +#if defined(__builtin_unreachable) +#define PI_BUILTIN_UNREACHABLE() __builtin_unreachable() +#else +#define PI_BUILTIN_UNREACHABLE() +#endif // defined(__builtin_unreachable) + namespace { std::string getCudaVersionString() { int driver_version = 0; @@ -238,8 +243,8 @@ pi_result getInfo(size_t param_value_size, void *param_value, int getAttribute(pi_device device, CUdevice_attribute attribute) { int value; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&value, attribute, device->get()) == CUDA_SUCCESS); + pi::assertion(cuDeviceGetAttribute(&value, attribute, device->get()) == + CUDA_SUCCESS); return value; } /// \endcond @@ -275,9 +280,6 @@ void guessLocalWorkSize(int *threadsPerBlock, const size_t *global_work_size, } // anonymous namespace /// ------ Error handling, matching OpenCL plugin semantics. -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { namespace pi { // Report error and no return (keeps compiler from printing warnings). @@ -295,9 +297,6 @@ void assertion(bool Condition, const char *Message) { } } // namespace pi -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) //-------------- // PI object implementation @@ -406,7 +405,7 @@ pi_result _pi_event::record() { try { eventId_ = queue_->get_next_event_id(); if (eventId_ == 0) { - cl::sycl::detail::pi::die( + pi::die( "Unrecoverable program state reached in event identifier overflow"); } result = PI_CHECK_ERROR(cuEventRecord(evEnd_, cuStream)); @@ -582,8 +581,7 @@ template class ReleaseGuard { // CUDA error for which it is unclear if the function that reported it // succeeded or not. Either way, the state of the program is compromised // and likely unrecoverable. - cl::sycl::detail::pi::die( - "Unrecoverable program state reached in cuda_piMemRelease"); + pi::die("Unrecoverable program state reached in cuda_piMemRelease"); } } } @@ -617,10 +615,10 @@ pi_result cuda_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, static pi_uint32 numPlatforms = 1; static _pi_platform platformId; - if (num_entries == 0 and platforms != nullptr) { + if ((num_entries == 0) && (platforms != nullptr)) { return PI_INVALID_VALUE; } - if (platforms == nullptr and num_platforms == nullptr) { + if ((platforms == nullptr) && (num_platforms == nullptr)) { return PI_INVALID_VALUE; } @@ -657,7 +655,7 @@ pi_result cuda_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, throw; } }, - err); + std::ref(err)); if (num_platforms != nullptr) { *num_platforms = numPlatforms; @@ -700,9 +698,9 @@ pi_result cuda_piPlatformGetInfo(pi_platform platform, return getInfo(param_value_size, param_value, param_value_size_ret, ""); } default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } - cl::sycl::detail::pi::die("Platform info request not implemented"); + pi::die("Platform info request not implemented"); return {}; } @@ -759,7 +757,7 @@ pi_result cuda_piContextGetInfo(pi_context context, pi_context_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, context->get_reference_count()); default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } return PI_OUT_OF_RESOURCES; @@ -795,10 +793,10 @@ pi_result cuda_piextDeviceSelectBinary(pi_device device, pi_uint32 num_binaries, pi_uint32 *selected_binary) { if (!binaries) { - cl::sycl::detail::pi::die("No list of device images provided"); + pi::die("No list of device images provided"); } if (num_binaries < 1) { - cl::sycl::detail::pi::die("No binary images in the list"); + pi::die("No binary images in the list"); } // Look for an image for the NVPTX64 target, and return the first one that is @@ -819,8 +817,7 @@ pi_result cuda_piextGetDeviceFunctionPointer(pi_device device, pi_program program, const char *function_name, pi_uint64 *function_pointer_ret) { - cl::sycl::detail::pi::die( - "cuda_piextGetDeviceFunctionPointer not implemented"); + pi::die("cuda_piextGetDeviceFunctionPointer not implemented"); return {}; } @@ -846,11 +843,10 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_DEVICE_INFO_MAX_COMPUTE_UNITS: { int compute_units = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&compute_units, - CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(compute_units >= 0); + pi::assertion(cuDeviceGetAttribute(&compute_units, + CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + device->get()) == CUDA_SUCCESS); + pi::assertion(compute_units >= 0); return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint32(compute_units)); } @@ -862,20 +858,20 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t return_sizes[max_work_item_dimensions]; int max_x = 0, max_y = 0, max_z = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&max_x, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(max_x >= 0); + pi::assertion(cuDeviceGetAttribute(&max_x, + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, + device->get()) == CUDA_SUCCESS); + pi::assertion(max_x >= 0); - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&max_y, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(max_y >= 0); + pi::assertion(cuDeviceGetAttribute(&max_y, + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, + device->get()) == CUDA_SUCCESS); + pi::assertion(max_y >= 0); - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&max_z, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(max_z >= 0); + pi::assertion(cuDeviceGetAttribute(&max_z, + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, + device->get()) == CUDA_SUCCESS); + pi::assertion(max_z >= 0); return_sizes[0] = size_t(max_x); return_sizes[1] = size_t(max_y); @@ -885,12 +881,12 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE: { int max_work_group_size = 0; - cl::sycl::detail::pi::assertion( + pi::assertion( cuDeviceGetAttribute(&max_work_group_size, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(max_work_group_size >= 0); + pi::assertion(max_work_group_size >= 0); return getInfo(param_value_size, param_value, param_value_size_ret, size_t(max_work_group_size)); @@ -939,10 +935,10 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY: { int clock_freq = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&clock_freq, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(clock_freq >= 0); + pi::assertion(cuDeviceGetAttribute(&clock_freq, + CU_DEVICE_ATTRIBUTE_CLOCK_RATE, + device->get()) == CUDA_SUCCESS); + pi::assertion(clock_freq >= 0); return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint32(clock_freq) / 1000u); } @@ -958,8 +954,7 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, // CL_DEVICE_TYPE_CUSTOM. size_t global = 0; - cl::sycl::detail::pi::assertion(cuDeviceTotalMem(&global, device->get()) == - CUDA_SUCCESS); + pi::assertion(cuDeviceTotalMem(&global, device->get()) == CUDA_SUCCESS); auto quarter_global = static_cast(global / 4u); @@ -988,17 +983,16 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: { // Take the smaller of maximum surface and maximum texture height. int tex_height = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&tex_height, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(tex_height >= 0); + pi::assertion(cuDeviceGetAttribute( + &tex_height, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT, + device->get()) == CUDA_SUCCESS); + pi::assertion(tex_height >= 0); int surf_height = 0; - cl::sycl::detail::pi::assertion( + pi::assertion( cuDeviceGetAttribute(&surf_height, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT, device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(surf_height >= 0); + pi::assertion(surf_height >= 0); int min = std::min(tex_height, surf_height); @@ -1007,17 +1001,15 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH: { // Take the smaller of maximum surface and maximum texture width. int tex_width = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&tex_width, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(tex_width >= 0); + pi::assertion(cuDeviceGetAttribute( + &tex_width, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(tex_width >= 0); int surf_width = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&surf_width, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(surf_width >= 0); + pi::assertion(cuDeviceGetAttribute( + &surf_width, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(surf_width >= 0); int min = std::min(tex_width, surf_width); @@ -1026,17 +1018,16 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: { // Take the smaller of maximum surface and maximum texture height. int tex_height = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&tex_height, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(tex_height >= 0); + pi::assertion(cuDeviceGetAttribute( + &tex_height, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT, + device->get()) == CUDA_SUCCESS); + pi::assertion(tex_height >= 0); int surf_height = 0; - cl::sycl::detail::pi::assertion( + pi::assertion( cuDeviceGetAttribute(&surf_height, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT, device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(surf_height >= 0); + pi::assertion(surf_height >= 0); int min = std::min(tex_height, surf_height); @@ -1045,17 +1036,15 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH: { // Take the smaller of maximum surface and maximum texture width. int tex_width = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&tex_width, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(tex_width >= 0); + pi::assertion(cuDeviceGetAttribute( + &tex_width, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(tex_width >= 0); int surf_width = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&surf_width, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(surf_width >= 0); + pi::assertion(cuDeviceGetAttribute( + &surf_width, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(surf_width >= 0); int min = std::min(tex_width, surf_width); @@ -1064,17 +1053,15 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH: { // Take the smaller of maximum surface and maximum texture depth. int tex_depth = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&tex_depth, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(tex_depth >= 0); + pi::assertion(cuDeviceGetAttribute( + &tex_depth, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(tex_depth >= 0); int surf_depth = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&surf_depth, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(surf_depth >= 0); + pi::assertion(cuDeviceGetAttribute( + &surf_depth, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(surf_depth >= 0); int min = std::min(tex_depth, surf_depth); @@ -1083,17 +1070,15 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: { // Take the smaller of maximum surface and maximum texture width. int tex_width = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&tex_width, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(tex_width >= 0); + pi::assertion(cuDeviceGetAttribute( + &tex_width, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(tex_width >= 0); int surf_width = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&surf_width, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(surf_width >= 0); + pi::assertion(cuDeviceGetAttribute( + &surf_width, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(surf_width >= 0); int min = std::min(tex_width, surf_width); @@ -1117,10 +1102,9 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: { int mem_base_addr_align = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&mem_base_addr_align, - CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, - device->get()) == CUDA_SUCCESS); + pi::assertion(cuDeviceGetAttribute(&mem_base_addr_align, + CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, + device->get()) == CUDA_SUCCESS); // Multiply by 8 as clGetDeviceInfo returns this value in bits mem_base_addr_align *= 8; return getInfo(param_value_size, param_value, param_value_size_ret, @@ -1155,10 +1139,10 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: { int cache_size = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&cache_size, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(cache_size >= 0); + pi::assertion(cuDeviceGetAttribute(&cache_size, + CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, + device->get()) == CUDA_SUCCESS); + pi::assertion(cache_size >= 0); // The L2 cache is global to the GPU. return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64(cache_size)); @@ -1166,18 +1150,17 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_GLOBAL_MEM_SIZE: { size_t bytes = 0; // Runtime API has easy access to this value, driver API info is scarse. - cl::sycl::detail::pi::assertion(cuDeviceTotalMem(&bytes, device->get()) == - CUDA_SUCCESS); + pi::assertion(cuDeviceTotalMem(&bytes, device->get()) == CUDA_SUCCESS); return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64{bytes}); } case PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: { int constant_memory = 0; - cl::sycl::detail::pi::assertion( + pi::assertion( cuDeviceGetAttribute(&constant_memory, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(constant_memory >= 0); + pi::assertion(constant_memory >= 0); return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64(constant_memory)); @@ -1197,32 +1180,31 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, // CUDA has its own definition of "local memory", which maps to OpenCL's // "private memory". int local_mem_size = 0; - cl::sycl::detail::pi::assertion( + pi::assertion( cuDeviceGetAttribute(&local_mem_size, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(local_mem_size >= 0); + pi::assertion(local_mem_size >= 0); return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64(local_mem_size)); } case PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: { int ecc_enabled = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&ecc_enabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, - device->get()) == CUDA_SUCCESS); + pi::assertion(cuDeviceGetAttribute(&ecc_enabled, + CU_DEVICE_ATTRIBUTE_ECC_ENABLED, + device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion((ecc_enabled == 0) | (ecc_enabled == 1)); + pi::assertion((ecc_enabled == 0) | (ecc_enabled == 1)); auto result = static_cast(ecc_enabled); return getInfo(param_value_size, param_value, param_value_size_ret, result); } case PI_DEVICE_INFO_HOST_UNIFIED_MEMORY: { int is_integrated = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&is_integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, - device->get()) == CUDA_SUCCESS); + pi::assertion(cuDeviceGetAttribute(&is_integrated, + CU_DEVICE_ATTRIBUTE_INTEGRATED, + device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion((is_integrated == 0) | - (is_integrated == 1)); + pi::assertion((is_integrated == 0) | (is_integrated == 1)); auto result = static_cast(is_integrated); return getInfo(param_value_size, param_value, param_value_size_ret, result); } @@ -1274,9 +1256,8 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_NAME: { static constexpr size_t MAX_DEVICE_NAME_LENGTH = 256u; char name[MAX_DEVICE_NAME_LENGTH]; - cl::sycl::detail::pi::assertion( - cuDeviceGetName(name, MAX_DEVICE_NAME_LENGTH, device->get()) == - CUDA_SUCCESS); + pi::assertion(cuDeviceGetName(name, MAX_DEVICE_NAME_LENGTH, + device->get()) == CUDA_SUCCESS); return getInfoArray(strlen(name) + 1, param_value_size, param_value, param_value_size_ret, name); } @@ -1453,9 +1434,9 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } - cl::sycl::detail::pi::die("Device info request not implemented"); + pi::die("Device info request not implemented"); return {}; } @@ -1483,8 +1464,7 @@ pi_result cuda_piextDeviceGetNativeHandle(pi_device device, pi_result cuda_piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, pi_platform platform, pi_device *device) { - cl::sycl::detail::pi::die( - "Creation of PI device from native handle not implemented"); + pi::die("Creation of PI device from native handle not implemented"); return {}; } @@ -1646,8 +1626,7 @@ pi_result cuda_piextContextGetNativeHandle(pi_context context, /// \return TBD pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context *context) { - cl::sycl::detail::pi::die( - "Creation of PI context from native handle not implemented"); + pi::die("Creation of PI context from native handle not implemented"); return {}; } @@ -1783,8 +1762,7 @@ pi_result cuda_piMemRelease(pi_mem memObj) { // error for which it is unclear if the function that reported it succeeded // or not. Either way, the state of the program is compromised and likely // unrecoverable. - cl::sycl::detail::pi::die( - "Unrecoverable program state reached in cuda_piMemRelease"); + pi::die("Unrecoverable program state reached in cuda_piMemRelease"); } return PI_SUCCESS; @@ -1862,7 +1840,7 @@ pi_result cuda_piMemGetInfo(pi_mem memObj, cl_mem_info queriedInfo, size_t expectedQuerySize, void *queryOutput, size_t *writtenQuerySize) { - cl::sycl::detail::pi::die("cuda_piMemGetInfo not implemented"); + pi::die("cuda_piMemGetInfo not implemented"); } /// Gets the native CUDA handle of a PI mem object @@ -1887,8 +1865,7 @@ pi_result cuda_piextMemGetNativeHandle(pi_mem mem, /// \return TBD pi_result cuda_piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, pi_mem *mem) { - cl::sycl::detail::pi::die( - "Creation of PI mem from native handle not implemented"); + pi::die("Creation of PI mem from native handle not implemented"); return {}; } @@ -1963,9 +1940,9 @@ pi_result cuda_piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, command_queue->properties_); default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } - cl::sycl::detail::pi::die("Queue info request not implemented"); + pi::die("Queue info request not implemented"); return {}; } @@ -2049,8 +2026,7 @@ pi_result cuda_piextQueueGetNativeHandle(pi_queue queue, pi_result cuda_piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, pi_queue *queue) { - cl::sycl::detail::pi::die( - "Creation of PI queue from native handle not implemented"); + pi::die("Creation of PI queue from native handle not implemented"); return {}; } @@ -2256,9 +2232,8 @@ pi_result cuda_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, arrayDesc.Format != CU_AD_FORMAT_SIGNED_INT32 && arrayDesc.Format != CU_AD_FORMAT_HALF && arrayDesc.Format != CU_AD_FORMAT_FLOAT) { - cl::sycl::detail::pi::die( - "PI CUDA kernels only support images with channel types int32, " - "uint32, float, and half."); + pi::die("PI CUDA kernels only support images with channel types int32, " + "uint32, float, and half."); } CUsurfObject cuSurf = arg_mem->mem_.surface_mem_.get_surface(); kernel->set_kernel_arg(arg_index, sizeof(cuSurf), (void *)&cuSurf); @@ -2419,7 +2394,7 @@ pi_result cuda_piEnqueueNativeKernel( pi_uint32 num_mem_objects, const pi_mem *mem_list, const void **args_mem_loc, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - cl::sycl::detail::pi::die("Not implemented in CUDA backend"); + pi::die("Not implemented in CUDA backend"); return {}; } @@ -2438,8 +2413,7 @@ pi_result cuda_piMemImageCreate(pi_context context, pi_mem_flags flags, // TODO: check SYCL CTS and spec. May also have to support BGRA if (image_format->image_channel_order != pi_image_channel_order::PI_IMAGE_CHANNEL_ORDER_RGBA) { - cl::sycl::detail::pi::die( - "cuda_piMemImageCreate only supports RGBA channel order"); + pi::die("cuda_piMemImageCreate only supports RGBA channel order"); } // We have to use cuArray3DCreate, which has some caveats. The height and @@ -2499,8 +2473,7 @@ pi_result cuda_piMemImageCreate(pi_context context, pi_mem_flags flags, pixel_type_size_bytes = 4; break; default: - cl::sycl::detail::pi::die( - "cuda_piMemImageCreate given unsupported image_channel_data_type"); + pi::die("cuda_piMemImageCreate given unsupported image_channel_data_type"); } // When a dimension isn't used image_desc has the size set to 1 @@ -2583,7 +2556,7 @@ pi_result cuda_piMemImageCreate(pi_context context, pi_mem_flags flags, pi_result cuda_piMemImageGetInfo(pi_mem image, pi_image_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - cl::sycl::detail::pi::die("cuda_piMemImageGetInfo not implemented"); + pi::die("cuda_piMemImageGetInfo not implemented"); return {}; } @@ -2601,7 +2574,7 @@ pi_result cuda_piclProgramCreateWithSource(pi_context context, pi_uint32 count, const char **strings, const size_t *lengths, pi_program *program) { - cl::sycl::detail::pi::die("cuda_piclProgramCreateWithSource not implemented"); + pi::die("cuda_piclProgramCreateWithSource not implemented"); return {}; } @@ -2636,7 +2609,7 @@ pi_result cuda_piProgramBuild(pi_program program, pi_uint32 num_devices, /// \TODO Not implemented pi_result cuda_piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program) { - cl::sycl::detail::pi::die("cuda_piProgramCreate not implemented"); + pi::die("cuda_piProgramCreate not implemented"); return {}; } @@ -2708,9 +2681,9 @@ pi_result cuda_piProgramGetInfo(pi_program program, pi_program_info param_name, getKernelNames(program).c_str()); } default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } - cl::sycl::detail::pi::die("Program info request not implemented"); + pi::die("Program info request not implemented"); return {}; } @@ -2825,9 +2798,9 @@ pi_result cuda_piProgramGetBuildInfo(pi_program program, pi_device device, return getInfoArray(program->MAX_LOG_SIZE, param_value_size, param_value, param_value_size_ret, program->infoLog_); default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } - cl::sycl::detail::pi::die("Program Build info request not implemented"); + pi::die("Program Build info request not implemented"); return {}; } @@ -2894,8 +2867,7 @@ pi_result cuda_piextProgramGetNativeHandle(pi_program program, pi_result cuda_piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, pi_program *program) { - cl::sycl::detail::pi::die( - "Creation of PI program from native handle not implemented"); + pi::die("Creation of PI program from native handle not implemented"); return {}; } @@ -2927,7 +2899,7 @@ pi_result cuda_piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, ""); } default: { - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } } } @@ -2947,10 +2919,9 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, switch (param_name) { case PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: { int max_threads = 0; - cl::sycl::detail::pi::assertion( - cuFuncGetAttribute(&max_threads, - CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, - kernel->get()) == CUDA_SUCCESS); + pi::assertion(cuFuncGetAttribute(&max_threads, + CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, + kernel->get()) == CUDA_SUCCESS); return getInfo(param_value_size, param_value, param_value_size_ret, size_t(max_threads)); } @@ -2968,32 +2939,32 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, case PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: { // OpenCL LOCAL == CUDA SHARED int bytes = 0; - cl::sycl::detail::pi::assertion( - cuFuncGetAttribute(&bytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, - kernel->get()) == CUDA_SUCCESS); + pi::assertion(cuFuncGetAttribute(&bytes, + CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, + kernel->get()) == CUDA_SUCCESS); return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64(bytes)); } case PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { // Work groups should be multiples of the warp size int warpSize = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&warpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, - device->get()) == CUDA_SUCCESS); + pi::assertion(cuDeviceGetAttribute(&warpSize, + CU_DEVICE_ATTRIBUTE_WARP_SIZE, + device->get()) == CUDA_SUCCESS); return getInfo(param_value_size, param_value, param_value_size_ret, static_cast(warpSize)); } case PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: { // OpenCL PRIVATE == CUDA LOCAL int bytes = 0; - cl::sycl::detail::pi::assertion( - cuFuncGetAttribute(&bytes, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, - kernel->get()) == CUDA_SUCCESS); + pi::assertion(cuFuncGetAttribute(&bytes, + CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, + kernel->get()) == CUDA_SUCCESS); return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64(bytes)); } default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } } @@ -3006,7 +2977,7 @@ pi_result cuda_piKernelGetSubGroupInfo( pi_kernel kernel, pi_device device, cl_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - cl::sycl::detail::pi::die("cuda_piKernelGetSubGroupInfo not implemented"); + pi::die("cuda_piKernelGetSubGroupInfo not implemented"); return {}; } @@ -3055,7 +3026,7 @@ pi_result cuda_piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index, // Events // pi_result cuda_piEventCreate(pi_context context, pi_event *event) { - cl::sycl::detail::pi::die("PI Event Create not implemented in CUDA backend"); + pi::die("PI Event Create not implemented in CUDA backend"); } pi_result cuda_piEventGetInfo(pi_event event, pi_event_info param_name, @@ -3081,7 +3052,7 @@ pi_result cuda_piEventGetInfo(pi_event event, pi_event_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, event->get_context()); default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } return PI_INVALID_EVENT; @@ -3114,9 +3085,9 @@ pi_result cuda_piEventGetProfilingInfo(pi_event event, return getInfo(param_value_size, param_value, param_value_size_ret, event->get_end_time()); default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } - cl::sycl::detail::pi::die("Event Profiling info request not implemented"); + pi::die("Event Profiling info request not implemented"); return {}; } @@ -3124,13 +3095,13 @@ pi_result cuda_piEventSetCallback(pi_event event, pi_int32 command_exec_callback_type, pfn_notify notify, void *user_data) { - cl::sycl::detail::pi::die("Event Callback not implemented in CUDA backend"); + pi::die("Event Callback not implemented in CUDA backend"); return PI_SUCCESS; } pi_result cuda_piEventSetStatus(pi_event event, pi_int32 execution_status) { - cl::sycl::detail::pi::die("Event Set Status not implemented in CUDA backend"); + pi::die("Event Set Status not implemented in CUDA backend"); return PI_INVALID_VALUE; } @@ -3139,9 +3110,8 @@ pi_result cuda_piEventRetain(pi_event event) { const auto refCount = event->increment_reference_count(); - cl::sycl::detail::pi::assertion( - refCount != 0, - "Reference count overflow detected in cuda_piEventRetain."); + pi::assertion(refCount != 0, + "Reference count overflow detected in cuda_piEventRetain."); return PI_SUCCESS; } @@ -3151,9 +3121,8 @@ pi_result cuda_piEventRelease(pi_event event) { // double delete or someone is messing with the ref count. // either way, cannot safely proceed. - cl::sycl::detail::pi::assertion( - event->get_reference_count() != 0, - "Reference count overflow detected in cuda_piEventRelease."); + pi::assertion(event->get_reference_count() != 0, + "Reference count overflow detected in cuda_piEventRelease."); // decrement ref count. If it is 0, delete the event. if (event->decrement_reference_count() == 0) { @@ -3233,8 +3202,7 @@ pi_result cuda_piextEventGetNativeHandle(pi_event event, /// \return TBD pi_result cuda_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, pi_event *event) { - cl::sycl::detail::pi::die( - "Creation of PI event from native handle not implemented"); + pi::die("Creation of PI event from native handle not implemented"); return {}; } @@ -3334,7 +3302,7 @@ pi_result cuda_piSamplerGetInfo(pi_sampler sampler, cl_sampler_info param_name, addressing_prop); } default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } return {}; } @@ -3361,9 +3329,8 @@ pi_result cuda_piSamplerRelease(pi_sampler sampler) { // double delete or someone is messing with the ref count. // either way, cannot safely proceed. - cl::sycl::detail::pi::assertion( - sampler->get_reference_count() != 0, - "Reference count overflow detected in cuda_piSamplerRelease."); + pi::assertion(sampler->get_reference_count() != 0, + "Reference count overflow detected in cuda_piSamplerRelease."); // decrement ref count. If it is 0, delete the sampler. if (sampler->decrement_reference_count() == 0) { @@ -3742,7 +3709,7 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR array_desc) { case CU_AD_FORMAT_FLOAT: return 4; } - cl::sycl::detail::pi::die("Invalid iamge format."); + pi::die("Invalid image format."); return 0; } @@ -4025,7 +3992,7 @@ pi_result cuda_piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - cl::sycl::detail::pi::die("cuda_piEnqueueMemImageFill not implemented"); + pi::die("cuda_piEnqueueMemImageFill not implemented"); return {}; } @@ -4218,7 +4185,7 @@ pi_result cuda_piextUSMFree(pi_context context, void *ptr) { unsigned int type; result = PI_CHECK_ERROR(cuPointerGetAttribute( &type, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)ptr)); - assert(type == CU_MEMORYTYPE_DEVICE or type == CU_MEMORYTYPE_HOST); + assert((type == CU_MEMORYTYPE_DEVICE) || (type == CU_MEMORYTYPE_HOST)); if (type == CU_MEMORYTYPE_DEVICE) { result = PI_CHECK_ERROR(cuMemFree((CUdeviceptr)ptr)); } @@ -4397,7 +4364,7 @@ pi_result cuda_piextUSMGetMemAllocInfo(pi_context context, const void *ptr, } result = PI_CHECK_ERROR(cuPointerGetAttribute( &value, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)ptr)); - assert(value == CU_MEMORYTYPE_DEVICE or value == CU_MEMORYTYPE_HOST); + assert((value == CU_MEMORYTYPE_DEVICE) || (value == CU_MEMORYTYPE_HOST)); if (value == CU_MEMORYTYPE_DEVICE) { // pointer to device memory return getInfo(param_value_size, param_value, param_value_size_ret, @@ -4409,7 +4376,7 @@ pi_result cuda_piextUSMGetMemAllocInfo(pi_context context, const void *ptr, PI_MEM_TYPE_HOST); } // should never get here - __builtin_unreachable(); + PI_BUILTIN_UNREACHABLE(); return getInfo(param_value_size, param_value, param_value_size_ret, PI_MEM_TYPE_UNKNOWN); } diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/piapi/plugins/cuda/pi_cuda.hpp similarity index 99% rename from sycl/plugins/cuda/pi_cuda.hpp rename to sycl/piapi/plugins/cuda/pi_cuda.hpp index 15de78d44bff2..60e1717bc2520 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/piapi/plugins/cuda/pi_cuda.hpp @@ -18,19 +18,28 @@ #ifndef PI_CUDA_HPP #define PI_CUDA_HPP -#include "CL/sycl/detail/pi.h" +#include + #include #include #include #include #include +#include #include +#include +#include #include +#include #include #include #include -#include -#include + +#if defined(__GNUC__) && !defined(__llvm__) && (__GNUC__ < 7) +namespace std { +using atomic_uint32_t = std::atomic; +} +#endif // GCC pre 7 extern "C" { diff --git a/sycl/piapi/plugins/level_zero/CMakeLists.txt b/sycl/piapi/plugins/level_zero/CMakeLists.txt new file mode 100755 index 0000000000000..e2afde38d296d --- /dev/null +++ b/sycl/piapi/plugins/level_zero/CMakeLists.txt @@ -0,0 +1,109 @@ +# PI Level Zero plugin library + +set(LEVEL_ZERO_LOADER + "${pi_library_output_dir}/${CMAKE_STATIC_LIBRARY_PREFIX}ze_loader") +if(MSVC) + set(LEVEL_ZERO_LOADER "${LEVEL_ZERO_LOADER}${CMAKE_STATIC_LIBRARY_SUFFIX}") +else() + set(LEVEL_ZERO_LOADER "${LEVEL_ZERO_LOADER}${CMAKE_SHARED_LIBRARY_SUFFIX}") +endif() + +if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) + message(STATUS "Download Level Zero loader and headers from github.com") + if (CMAKE_C_COMPILER) + list(APPEND AUX_CMAKE_FLAGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}) + endif() + if (CMAKE_CXX_COMPILER) + list(APPEND AUX_CMAKE_FLAGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}) + endif() + + set(level_zero_loader_binary_dir "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_build") + file(MAKE_DIRECTORY "${level_zero_loader_binary_dir}") + + set(level_zero_loader_install_dir "${pi_library_base_dir}") + + set(LEVEL_ZERO_LOADER_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/Level0/level_zero_loader") + if (NOT DEFINED SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE) + set(SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE ${SYCL_EXTERNAL_PROJECTS_SKIP_AUTO_UPDATE}) + endif() + + ExternalProject_Add(level-zero-loader + GIT_REPOSITORY https://github.com/oneapi-src/level-zero.git + GIT_TAG v1.0 + UPDATE_DISCONNECTED ${SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE} + SOURCE_DIR ${LEVEL_ZERO_LOADER_SOURCE_DIR} + BINARY_DIR "${level_zero_loader_binary_dir}" + INSTALL_DIR "${level_zero_loader_install_dir}" + CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} + -DOpenCL_INCLUDE_DIR=${OpenCL_INCLUDE_DIRS} + -DCMAKE_INSTALL_PREFIX= + -DCMAKE_INSTALL_LIBDIR:PATH=lib${PI_LIBDIR_SUFFIX} + LOG_DOWNLOAD 1 + LOG_UPDATE 1 + LOG_CONFIGURE 1 + LOG_BUILD 1 + LOG_INSTALL 1 + ${AUX_CMAKE_FLAGS} + STEP_TARGETS configure,build,install + BUILD_BYPRODUCTS ${LEVEL_ZERO_LOADER} + ) + set(LEVEL_ZERO_INCLUDE_DIR "${level_zero_loader_install_dir}/include") + file(MAKE_DIRECTORY "${LEVEL_ZERO_INCLUDE_DIR}") +else() + include_directories("${LEVEL_ZERO_INCLUDE_DIR}") + file(GLOB LEVEL_ZERO_LIBRARY_SRC "${LEVEL_ZERO_LIBRARY}*") + file(COPY ${LEVEL_ZERO_LIBRARY_SRC} DESTINATION ${pi_library_output_dir}) + add_custom_target(level-zero-loader DEPENDS ${LEVEL_ZERO_LIBRARY} COMMENT "Copying Level Zero Loader ...") +endif() + +add_library(level_zero_lib INTERFACE IMPORTED GLOBAL) +set_target_properties(level_zero_lib PROPERTIES + INTERFACE_LINK_LIBRARIES ${LEVEL_ZERO_LOADER} + INTERFACE_INCLUDE_DIRECTORIES "${LEVEL_ZERO_INCLUDE_DIR}" +) +add_dependencies(level_zero_lib level-zero-loader) + +add_library(pi_level_zero SHARED + "${pi_include_dir}/pi/pi.h" + "pi_level_zero.cpp" + "pi_level_zero.hpp" + "usm_allocator.cpp" + "usm_allocator.hpp" +) + +set_target_properties(pi_level_zero PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY "${pi_library_output_dir}" + LIBRARY_OUTPUT_DIRECTORY "${pi_library_output_dir}" + RUNTIME_OUTPUT_DIRECTORY "${pi_binary_output_dir}" +) + +target_link_libraries(pi_level_zero PUBLIC piapi) +target_link_libraries(pi_level_zero PRIVATE + level_zero_lib + pi_export_library +) +if (UNIX) + target_link_libraries(pi_level_zero PRIVATE pthread) +endif() + +################################################################################ +## install + +install(TARGETS pi_level_zero + EXPORT piapiTargets + LIBRARY DESTINATION "lib${PI_LIBDIR_SUFFIX}" COMPONENT pi_level_zero + RUNTIME DESTINATION "bin" COMPONENT pi_level_zero) +export(TARGETS pi_level_zero APPEND + FILE "${PROJECT_BINARY_DIR}/piapiTargets.cmake" +) +install(FILES "pi_level_zero.hpp" DESTINATION include/pi COMPONENT headers) +install(DIRECTORY "${LEVEL_ZERO_INCLUDE_DIR}/" + DESTINATION "include" + COMPONENT headers +) +install(DIRECTORY "${pi_library_output_dir}" + DESTINATION . + COMPONENT pi_level_zero + FILES_MATCHING PATTERN "${CMAKE_STATIC_LIBRARY_PREFIX}ze*" +) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/piapi/plugins/level_zero/pi_level_zero.cpp similarity index 99% rename from sycl/plugins/level_zero/pi_level_zero.cpp rename to sycl/piapi/plugins/level_zero/pi_level_zero.cpp index d8b35467d545f..d0d8a21ea9f57 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/piapi/plugins/level_zero/pi_level_zero.cpp @@ -2828,7 +2828,7 @@ zeModuleDynamicLinkMock(uint32_t numModules, ze_module_handle_t *phModules, // is only a single module that is "linked" to itself. There is nothing to // do in this degenerate case. if (numModules > 1) { - die("piProgramLink: Program Linking is not supported yet in Level0"); + die("piProgramLink: Program Linking is not supported yet in Level Zero"); } // The mock does not support the link log. @@ -5107,7 +5107,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { #define _PI_API(api) \ (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); -#include +#include return PI_SUCCESS; } diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/piapi/plugins/level_zero/pi_level_zero.hpp similarity index 99% rename from sycl/plugins/level_zero/pi_level_zero.hpp rename to sycl/piapi/plugins/level_zero/pi_level_zero.hpp index c552df2556fb3..93d0659ead407 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/piapi/plugins/level_zero/pi_level_zero.hpp @@ -18,7 +18,6 @@ #ifndef PI_LEVEL_ZERO_HPP #define PI_LEVEL_ZERO_HPP -#include #include #include #include @@ -27,13 +26,14 @@ #include #include #include +#include #include #include -#include - #include "usm_allocator.hpp" +#include + template To pi_cast(From Value) { // TODO: see if more sanity checks are possible. assert(sizeof(From) == sizeof(To)); diff --git a/sycl/plugins/level_zero/usm_allocator.cpp b/sycl/piapi/plugins/level_zero/usm_allocator.cpp similarity index 100% rename from sycl/plugins/level_zero/usm_allocator.cpp rename to sycl/piapi/plugins/level_zero/usm_allocator.cpp diff --git a/sycl/plugins/level_zero/usm_allocator.hpp b/sycl/piapi/plugins/level_zero/usm_allocator.hpp similarity index 100% rename from sycl/plugins/level_zero/usm_allocator.hpp rename to sycl/piapi/plugins/level_zero/usm_allocator.hpp diff --git a/sycl/piapi/plugins/opencl/CMakeLists.txt b/sycl/piapi/plugins/opencl/CMakeLists.txt new file mode 100644 index 0000000000000..584f0aa39225a --- /dev/null +++ b/sycl/piapi/plugins/opencl/CMakeLists.txt @@ -0,0 +1,36 @@ +#TODO: +#1. Figure out why CMP0057 has to be set. Should have been taken care of earlier in the build +#2. Use AddLLVM to modify the build and access config options +#cmake_policy(SET CMP0057 NEW) +#include(AddLLVM) + +find_package(OpenCL REQUIRED) + +# Plugin for OpenCL +# Create Shared library for libpi_opencl.so. +#TODO: remove dependency on pi.hpp in sycl project. +#TODO: Currently, the pi.hpp header is common between sycl and plugin library sources. +#This can be changed by copying the pi.hpp file in the plugins project. + +add_library(pi_opencl SHARED + "${pi_include_dir}/pi/pi.h" + "pi_opencl.cpp" +) +set_target_properties(pi_opencl PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY "${pi_library_output_dir}" + LIBRARY_OUTPUT_DIRECTORY "${pi_library_output_dir}" + RUNTIME_OUTPUT_DIRECTORY "${pi_binary_output_dir}" +) + +target_include_directories(pi_opencl PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") +target_link_libraries(pi_opencl PUBLIC piapi OpenCL::OpenCL) +target_link_libraries(pi_opencl PRIVATE pi_export_library) + +install(TARGETS pi_opencl + EXPORT piapiTargets + LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT pi_opencl + RUNTIME DESTINATION "bin" COMPONENT pi_opencl +) +export(TARGETS pi_opencl APPEND + FILE "${PROJECT_BINARY_DIR}/piapiTargets.cmake" +) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/piapi/plugins/opencl/pi_opencl.cpp similarity index 99% rename from sycl/plugins/opencl/pi_opencl.cpp rename to sycl/piapi/plugins/opencl/pi_opencl.cpp index 025437b8ba8a9..a5198470b7c59 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/piapi/plugins/opencl/pi_opencl.cpp @@ -14,8 +14,10 @@ /// /// \ingroup sycl_pi_ocl -#include -#include +#include +#include +#include +#include #include #include diff --git a/sycl/piapi/plugins/test.cpp b/sycl/piapi/plugins/test.cpp new file mode 100644 index 0000000000000..0a8c1cf371276 --- /dev/null +++ b/sycl/piapi/plugins/test.cpp @@ -0,0 +1,74 @@ +//==---------- test.cpp - Test piapi library -------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include + +// Configuration hooks +namespace pi { +namespace config { +TraceLevel trace_level_mask() { return {TraceLevel::PI_TRACE_ALL}; } +pi::backend *backend() { return nullptr; } +pi::device_filter_list *device_filter_list() { return nullptr; } +} // namespace config +} // namespace pi + +std::string platform_info_string(pi::plugin &p, pi_platform plt, + pi_platform_info param) { + std::size_t paramSize = 0; + p.call(plt, param, 0, nullptr, ¶mSize); + + if (paramSize == 0) { + std::cout << "Empty platform name" << std::endl; + return ""; + } + + std::vector platformNameBuffer; + platformNameBuffer.resize(paramSize); + + p.call(plt, param, paramSize, + platformNameBuffer.data(), nullptr); + + return platformNameBuffer.data(); +} + +int main() { + auto plugins = pi::initialize(); + + std::cout << "Num plugins: " << plugins.size() << std::endl; + + for (auto &p : plugins) { + std::cout << "=== plugin" << std::endl; + + pi_uint32 numPlatforms = 0; + p.call(0, nullptr, &numPlatforms); + + std::cout << "Num platforms: " << numPlatforms << std::endl; + + if (numPlatforms == 0) { + continue; + } + + std::vector platforms; + platforms.resize(numPlatforms); + p.call(numPlatforms, platforms.data(), nullptr); + + for (auto &plt : platforms) { + std::cout << "Platform name: " + << platform_info_string(p, plt, PI_PLATFORM_INFO_NAME) + << std::endl; + + std::cout << "Platform version: " + << platform_info_string(p, plt, PI_PLATFORM_INFO_VERSION) + << std::endl; + } + } + + return 0; +} diff --git a/sycl/source/detail/pi.cpp b/sycl/piapi/src/pi.cpp similarity index 87% rename from sycl/source/detail/pi.cpp rename to sycl/piapi/src/pi.cpp index 0ff4081e96ade..baf8de9b8dc80 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/piapi/src/pi.cpp @@ -11,41 +11,30 @@ /// /// \ingroup sycl_pi -#include "context_impl.hpp" -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include #include +#include #include #include #include +#include #include -#include +#include #include +#include +#include +#include #ifdef XPTI_ENABLE_INSTRUMENTATION // Include the headers necessary for emitting // traces using the trace framework #include "xpti_trace_framework.h" -#endif -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { -#ifdef XPTI_ENABLE_INSTRUMENTATION -// Global (to the SYCL runtime) graph handle that all command groups are a -// child of -/// Event to be used by graph related activities -xpti_td *GSYCLGraphEvent = nullptr; -/// Event to be used by PI layer related activities -xpti_td *GPICallEvent = nullptr; /// Constants being used as placeholder until one is able to reliably get the /// version of the SYCL runtime constexpr uint32_t GMajVer = 1; @@ -55,9 +44,29 @@ constexpr const char *GVerStr = "sycl 1.0"; namespace pi { -static void initializePlugins(vector_class *Plugins); +#ifdef XPTI_ENABLE_INSTRUMENTATION + +// We define a sycl stream name and this will be used by the instrumentation +// framework +extern const char *SYCL_STREAM_NAME; + +// Global (to the SYCL runtime) graph handle that all command groups are a +// child of +/// Event to be used by graph related activities +extern xpti_td *GSYCLGraphEvent; + +/// Event to be used by PI layer related activities +xpti_td *GPICallEvent = nullptr; + +// Stream name being used for traces generated from the SYCL plugin layer +constexpr const char *SYCL_PICALL_STREAM_NAME = "sycl.pi"; + +#endif // XPTI_ENABLE_INSTRUMENTATION + +static void initializePlugins(std::vector *Plugins); bool XPTIInitDone = false; +std::shared_ptr GlobalPlugin; // Implementation of the SYCL PI API call tracing methods that use XPTI // framework to emit these traces that will be used by tools. @@ -120,16 +129,6 @@ void emitFunctionEndTrace(uint64_t CorrelationID, const char *FName) { #endif // XPTI_ENABLE_INSTRUMENTATION } -void contextSetExtendedDeleter(const cl::sycl::context &context, - pi_context_extended_deleter func, - void *user_data) { - auto impl = getSyclObjImpl(context); - auto contextHandle = reinterpret_cast(impl->getHandleRef()); - auto plugin = impl->getPlugin(); - plugin.call_nocheck( - contextHandle, func, user_data); -} - std::string platformInfoToString(pi_platform_info info) { switch (info) { case PI_PLATFORM_INFO_PROFILE: @@ -144,7 +143,7 @@ std::string platformInfoToString(pi_platform_info info) { return "PI_PLATFORM_INFO_EXTENSIONS"; default: die("Unknown pi_platform_info value passed to " - "cl::sycl::detail::pi::platformInfoToString"); + "pi::platformInfoToString"); } } @@ -206,23 +205,18 @@ std::string memFlagsToString(pi_mem_flags Flags) { return Sstream.str(); } -// GlobalPlugin is a global Plugin used with Interoperability constructors that -// use OpenCL objects to construct SYCL class objects. -std::shared_ptr GlobalPlugin; - // Find the plugin at the appropriate location and return the location. -bool findPlugins(vector_class> &PluginNames) { +bool findPlugins(std::vector> &PluginNames) { // TODO: Based on final design discussions, change the location where the // plugin must be searched; how to identify the plugins etc. Currently the // search is done for libpi_opencl.so/pi_opencl.dll file in LD_LIBRARY_PATH // env only. // - device_filter_list *FilterList = SYCLConfig::get(); + device_filter_list *FilterList = config::device_filter_list(); if (!FilterList) { - PluginNames.emplace_back(__SYCL_OPENCL_PLUGIN_NAME, backend::opencl); - PluginNames.emplace_back(__SYCL_LEVEL_ZERO_PLUGIN_NAME, - backend::level_zero); - PluginNames.emplace_back(__SYCL_CUDA_PLUGIN_NAME, backend::cuda); + PluginNames.emplace_back(PI_OPENCL_PLUGIN_NAME, backend::opencl); + PluginNames.emplace_back(PI_LEVEL_ZERO_PLUGIN_NAME, backend::level_zero); + PluginNames.emplace_back(PI_CUDA_PLUGIN_NAME, backend::cuda); } else { std::vector Filters = FilterList->get(); bool OpenCLFound = false; @@ -232,16 +226,16 @@ bool findPlugins(vector_class> &PluginNames) { backend Backend = Filter.Backend; if (!OpenCLFound && (Backend == backend::opencl || Backend == backend::all)) { - PluginNames.emplace_back(__SYCL_OPENCL_PLUGIN_NAME, backend::opencl); + PluginNames.emplace_back(PI_OPENCL_PLUGIN_NAME, backend::opencl); OpenCLFound = true; } else if (!LevelZeroFound && (Backend == backend::level_zero || Backend == backend::all)) { - PluginNames.emplace_back(__SYCL_LEVEL_ZERO_PLUGIN_NAME, + PluginNames.emplace_back(PI_LEVEL_ZERO_PLUGIN_NAME, backend::level_zero); LevelZeroFound = true; } else if (!CudaFound && (Backend == backend::cuda || Backend == backend::all)) { - PluginNames.emplace_back(__SYCL_CUDA_PLUGIN_NAME, backend::cuda); + PluginNames.emplace_back(PI_CUDA_PLUGIN_NAME, backend::cuda); CudaFound = true; } } @@ -280,23 +274,22 @@ bool bindPlugin(void *Library, PiPlugin *PluginInformation) { } bool trace(TraceLevel Level) { - auto TraceLevelMask = SYCLConfig::get(); + auto TraceLevelMask = config::trace_level_mask(); return (TraceLevelMask & Level) == Level; } // Initializes all available Plugins. -const vector_class &initialize() { - static std::once_flag PluginsInitDone; - - std::call_once(PluginsInitDone, []() { - initializePlugins(&GlobalHandler::instance().getPlugins()); - }); - - return GlobalHandler::instance().getPlugins(); +const std::vector &initialize() { + static std::vector *Plugins = []() { + auto PluginsPtr = new std::vector; + initializePlugins(PluginsPtr); + return PluginsPtr; + }(); + return *Plugins; } -static void initializePlugins(vector_class *Plugins) { - vector_class> PluginNames; +static void initializePlugins(std::vector *Plugins) { + std::vector> PluginNames; findPlugins(PluginNames); if (PluginNames.empty() && trace(PI_TRACE_ALL)) @@ -327,7 +320,7 @@ static void initializePlugins(vector_class *Plugins) { } continue; } - backend *BE = SYCLConfig::get(); + backend *BE = config::backend(); // Use OpenCL as the default interoperability plugin. // This will go away when we make backend interoperability selection // explicit in SYCL-2020. @@ -406,15 +399,15 @@ template const plugin &getPlugin() { if (Plugin) return *Plugin; - const vector_class &Plugins = pi::initialize(); + const std::vector &Plugins = pi::initialize(); for (const auto &P : Plugins) if (P.getBackend() == BE) { Plugin = &P; return *Plugin; } - throw runtime_error("pi::getPlugin couldn't find plugin", - PI_INVALID_OPERATION); + throw std::runtime_error("pi::getPlugin couldn't find plugin (" + + std::to_string(PI_INVALID_OPERATION) + ")"); } template const plugin &getPlugin(); @@ -524,7 +517,7 @@ pi_uint32 DeviceBinaryProperty::asUint32() const { assert(Prop->Type == PI_PROPERTY_TYPE_UINT32 && "property type mismatch"); // if type fits into the ValSize - it is used to store the property value assert(Prop->ValAddr == nullptr && "primitive types must be stored inline"); - return sycl::detail::pi::asUint32(&Prop->ValSize); + return pi::asUint32(&Prop->ValSize); } ByteArray DeviceBinaryProperty::asByteArray() const { @@ -558,16 +551,16 @@ void DeviceBinaryImage::PropertyRange::init(pi_device_binary Bin, End = Begin ? PS->PropertiesEnd : nullptr; } -RT::PiDeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData, +pi::PiDeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData, size_t ImgSize) { struct { - RT::PiDeviceBinaryType Fmt; + pi::PiDeviceBinaryType Fmt; const uint32_t Magic; } Fmts[] = {{PI_DEVICE_BINARY_TYPE_SPIRV, 0x07230203}, {PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE, 0xDEC04342}}; if (ImgSize >= sizeof(Fmts[0].Magic)) { - detail::remove_const_t Hdr = 0; + typename std::remove_const::type Hdr = 0; std::copy(ImgData, ImgData + sizeof(Hdr), reinterpret_cast(&Hdr)); for (const auto &Fmt : Fmts) { @@ -597,6 +590,3 @@ void DeviceBinaryImage::init(pi_device_binary Bin) { } } // namespace pi -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/piapi/src/plugin_pi_hooks.cpp b/sycl/piapi/src/plugin_pi_hooks.cpp new file mode 100644 index 0000000000000..d07a138cbf262 --- /dev/null +++ b/sycl/piapi/src/plugin_pi_hooks.cpp @@ -0,0 +1,24 @@ +//==---------- plugin_pi_hooks.cpp - PI library hooks ----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +// We just need basic definitions for the plugins to link properly +// These symbols aren't actually used in plugins, +// any library linking against piapi directly +// still needs to define its own hooks + +namespace pi { +namespace config { + +TraceLevel trace_level_mask() { return {TraceLevel::PI_TRACE_ALL}; } +pi::backend *backend() { return nullptr; } +pi::device_filter_list *device_filter_list() { return nullptr; } + +} // namespace config +} // namespace pi diff --git a/sycl/source/detail/posix_pi.cpp b/sycl/piapi/src/posix_pi.cpp similarity index 81% rename from sycl/source/detail/posix_pi.cpp rename to sycl/piapi/src/posix_pi.cpp index db21e2fa01ced..dc1516e67e28a 100644 --- a/sycl/source/detail/posix_pi.cpp +++ b/sycl/piapi/src/posix_pi.cpp @@ -6,14 +6,9 @@ // //===----------------------------------------------------------------------===// -#include - #include #include -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { namespace pi { void *loadOsLibrary(const std::string &PluginPath) { @@ -27,6 +22,3 @@ void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName) { } } // namespace pi -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/piapi/src/shim.cpp b/sycl/piapi/src/shim.cpp new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sycl/source/detail/windows_pi.cpp b/sycl/piapi/src/windows_pi.cpp similarity index 79% rename from sycl/source/detail/windows_pi.cpp rename to sycl/piapi/src/windows_pi.cpp index 3e7d87cc86efd..dc48247e19872 100644 --- a/sycl/source/detail/windows_pi.cpp +++ b/sycl/piapi/src/windows_pi.cpp @@ -6,15 +6,10 @@ // //===----------------------------------------------------------------------===// -#include - +#include #include #include -#include -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { namespace pi { void *loadOsLibrary(const std::string &PluginPath) { @@ -26,6 +21,3 @@ void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName) { } } // namespace pi -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/plugins/CMakeLists.txt b/sycl/plugins/CMakeLists.txt deleted file mode 100644 index 700e09d1a0c1d..0000000000000 --- a/sycl/plugins/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) - -if(SYCL_BUILD_PI_CUDA) - add_subdirectory(cuda) -endif() - -add_subdirectory(opencl) -add_subdirectory(level_zero) diff --git a/sycl/plugins/cuda/CMakeLists.txt b/sycl/plugins/cuda/CMakeLists.txt deleted file mode 100644 index 1c230b795a10e..0000000000000 --- a/sycl/plugins/cuda/CMakeLists.txt +++ /dev/null @@ -1,51 +0,0 @@ -message(STATUS "Including the PI API CUDA backend.") - - # cannot rely on cmake support for CUDA; it assumes runtime API is being used. - # we only require the CUDA driver API to be used - # CUDA_CUDA_LIBRARY variable defines the path to libcuda.so, the CUDA Driver API library. - -find_package(CUDA 10.1 REQUIRED) - -# Make imported library global to use it within the project. -add_library(cudadrv SHARED IMPORTED GLOBAL) - -set_target_properties( - cudadrv PROPERTIES - IMPORTED_LOCATION ${CUDA_CUDA_LIBRARY} - INTERFACE_INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} -) - -add_library(pi_cuda SHARED - "${sycl_inc_dir}/CL/sycl/detail/pi.h" - "${sycl_inc_dir}/CL/sycl/detail/pi.hpp" - "pi_cuda.hpp" - "pi_cuda.cpp" -) - -add_dependencies(pi_cuda - ocl-headers -) - -add_dependencies(sycl-toolchain pi_cuda) - -set_target_properties(pi_cuda PROPERTIES LINKER_LANGUAGE CXX) - -target_include_directories(pi_cuda - PRIVATE - ${sycl_inc_dir} - PUBLIC - ${CUDA_INCLUDE_DIRS} -) - -target_link_libraries(pi_cuda PUBLIC OpenCL-Headers cudadrv) - -add_common_options(pi_cuda) - -install(TARGETS pi_cuda - LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT pi_cuda - RUNTIME DESTINATION "bin" COMPONENT pi_cuda -) - -# `sycl/source/CMakeLists.txt` adapted when SYCL_BUILD_PI_CUDA is defined: -# target_link_libraries(sycl PUBLIC pi_cuda) -# target_compile_definitions(sycl PUBLIC USE_PI_CUDA) diff --git a/sycl/plugins/ld-version-script.txt b/sycl/plugins/ld-version-script.txt deleted file mode 100644 index 1ad2c6d5f8390..0000000000000 --- a/sycl/plugins/ld-version-script.txt +++ /dev/null @@ -1,10 +0,0 @@ -{ - /* in CMakelists.txt, we pass -fvisibility=hidden compiler flag */ - /* This file is used to give exception of the hidden visibility */ - /* Export only pi* function symbols which are individually marked 'default' visibility */ - - global: pi*; - - /* all other symbols are local scope, meaning not exported */ - local: *; -}; diff --git a/sycl/plugins/level_zero/CMakeLists.txt b/sycl/plugins/level_zero/CMakeLists.txt deleted file mode 100755 index b9127deb803f4..0000000000000 --- a/sycl/plugins/level_zero/CMakeLists.txt +++ /dev/null @@ -1,116 +0,0 @@ -# PI Level Zero plugin library - -if(MSVC) - set(LEVEL_ZERO_LOADER - "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}ze_loader${CMAKE_STATIC_LIBRARY_SUFFIX}") -else() - set(LEVEL_ZERO_LOADER - "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}ze_loader${CMAKE_SHARED_LIBRARY_SUFFIX}") -endif() - -if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) - message(STATUS "Download Level Zero loader and headers from github.com") - if (CMAKE_C_COMPILER) - list(APPEND AUX_CMAKE_FLAGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}) - endif() - if (CMAKE_CXX_COMPILER) - list(APPEND AUX_CMAKE_FLAGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}) - endif() - file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_build) - set(LEVEL_ZERO_LOADER_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/level_zero/level_zero_loader") - if (NOT DEFINED SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE) - set(SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE ${SYCL_EXTERNAL_PROJECTS_SKIP_AUTO_UPDATE}) - endif() - ExternalProject_Add(level-zero-loader - GIT_REPOSITORY https://github.com/oneapi-src/level-zero.git - GIT_TAG v1.0 - UPDATE_DISCONNECTED ${SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE} - SOURCE_DIR ${LEVEL_ZERO_LOADER_SOURCE_DIR} - BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_build" - INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_install" - CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} - -DOpenCL_INCLUDE_DIR=${OpenCL_INCLUDE_DIRS} - -DCMAKE_INSTALL_PREFIX= - -DCMAKE_INSTALL_LIBDIR:PATH=lib${LLVM_LIBDIR_SUFFIX} - ${AUX_CMAKE_FLAGS} - LOG_DOWNLOAD 1 - LOG_UPDATE 1 - LOG_CONFIGURE 1 - LOG_BUILD 1 - LOG_INSTALL 1 - STEP_TARGETS configure,build,install - DEPENDS ocl-headers - BUILD_BYPRODUCTS ${LEVEL_ZERO_LOADER} - ) - ExternalProject_Add_Step(level-zero-loader llvminstall - COMMAND ${CMAKE_COMMAND} -E copy_directory / ${LLVM_BINARY_DIR} - COMMENT "Installing level-zero-loader into the LLVM binary directory" - DEPENDEES install - ) - - install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_install/" - DESTINATION "." - COMPONENT level-zero-loader - ) - - list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS level-zero-loader) -else() - include_directories("${LEVEL_ZERO_INCLUDE_DIR}") - file(GLOB LEVEL_ZERO_LIBRARY_SRC "${LEVEL_ZERO_LIBRARY}*") - file(COPY ${LEVEL_ZERO_LIBRARY_SRC} DESTINATION ${LLVM_LIBRARY_OUTPUT_INTDIR}) - add_custom_target(level-zero-loader DEPENDS ${LEVEL_ZERO_LIBRARY} COMMENT "Copying Level Zero Loader ...") -endif() - -add_library (LevelZeroLoader-Headers INTERFACE) -add_library (LevelZeroLoader::Headers ALIAS LevelZeroLoader-Headers) -target_include_directories(LevelZeroLoader-Headers - INTERFACE "${LEVEL_ZERO_INCLUDE_DIR}" -) - -include_directories("${sycl_inc_dir}") -include_directories(${OPENCL_INCLUDE}) - -add_library(pi_level_zero SHARED - "${sycl_inc_dir}/CL/sycl/detail/pi.h" - "${CMAKE_CURRENT_SOURCE_DIR}/pi_level_zero.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/pi_level_zero.hpp" - "${CMAKE_CURRENT_SOURCE_DIR}/usm_allocator.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/usm_allocator.hpp" -) - -if (MSVC) - # by defining __SYCL_BUILD_SYCL_DLL, we can use __declspec(dllexport) - # which are individually tagged for all pi* symbols in pi.h - target_compile_definitions(pi_level_zero PRIVATE __SYCL_BUILD_SYCL_DLL) -else() - # we set the visibility of all symbols 'hidden' by default. - # In pi.h file, we set exported symbols with visibility==default individually - target_compile_options(pi_level_zero PUBLIC -fvisibility=hidden) - - # This script file is used to allow exporting pi* symbols only. - # All other symbols are regarded as local (hidden) - set(linker_script "${CMAKE_CURRENT_SOURCE_DIR}/../ld-version-script.txt") - - # Filter symbols based on the scope defined in the script file, - # and export pi* function symbols in the library. - target_link_libraries( pi_level_zero - PRIVATE "-Wl,--version-script=${linker_script}" - ) -endif() - -if (TARGET level-zero-loader) - add_dependencies(pi_level_zero level-zero-loader) -endif() - add_dependencies(sycl-toolchain pi_level_zero) - - target_link_libraries(pi_level_zero PRIVATE "${LEVEL_ZERO_LOADER}") -if (UNIX) - target_link_libraries(pi_level_zero PRIVATE pthread) -endif() - -add_common_options(pi_level_zero) - -install(TARGETS pi_level_zero - LIBRARY DESTINATION "lib" COMPONENT pi_level_zero - RUNTIME DESTINATION "bin" COMPONENT pi_level_zero) diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt deleted file mode 100644 index 30674c1d5621b..0000000000000 --- a/sycl/plugins/opencl/CMakeLists.txt +++ /dev/null @@ -1,59 +0,0 @@ -#TODO: -#1. Figure out why CMP0057 has to be set. Should have been taken care of earlier in the build -#2. Use AddLLVM to modify the build and access config options -#cmake_policy(SET CMP0057 NEW) -#include(AddLLVM) - -# Plugin for OpenCL -# Create Shared library for libpi_opencl.so. -#TODO: remove dependency on pi.hpp in sycl project. -#TODO: Currently, the pi.hpp header is common between sycl and plugin library sources. -#This can be changed by copying the pi.hpp file in the plugins project. - -add_library(pi_opencl SHARED - "${sycl_inc_dir}/CL/sycl/detail/pi.h" - "pi_opencl.cpp" - ) - -add_dependencies(pi_opencl - ocl-icd - ocl-headers -) - -add_dependencies(sycl-toolchain pi_opencl) - -set_target_properties(pi_opencl PROPERTIES LINKER_LANGUAGE CXX) - -#preprocessor definitions for compiling a target's sources. We do not need it for pi_opencl -target_include_directories(pi_opencl PRIVATE "${sycl_inc_dir}") - -#link pi_opencl with OpenCL headers and ICD Loader. -target_link_libraries( pi_opencl - PRIVATE OpenCL::Headers - PRIVATE ${OpenCL_LIBRARIES} -) -if (MSVC) - # by defining __SYCL_BUILD_SYCL_DLL, we can use __declspec(dllexport) - # which are individually tagged for all pi* symbols in pi.h - target_compile_definitions(pi_opencl PRIVATE __SYCL_BUILD_SYCL_DLL) -else() - # we set the visibility of all symbols 'hidden' by default. - # In pi.h file, we set exported symbols with visibility==default individually - target_compile_options(pi_opencl PUBLIC -fvisibility=hidden) - - # This script file is used to allow exporting pi* symbols only. - # All other symbols are regarded as local (hidden) - set(linker_script "${CMAKE_CURRENT_SOURCE_DIR}/../ld-version-script.txt") - - # Filter symbols based on the scope defined in the script file, - # and export pi* function symbols in the library. - target_link_libraries( pi_opencl - PRIVATE "-Wl,--version-script=${linker_script}" - ) -endif() - -add_common_options(pi_opencl) - -install(TARGETS pi_opencl - LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT pi_opencl - RUNTIME DESTINATION "bin" COMPONENT pi_opencl) diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 890743b7c6c58..1f2039d354503 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -76,8 +76,11 @@ function(add_sycl_rt_library LIB_NAME) ${OpenCL_LIBRARIES} ${CMAKE_DL_LIBS} ${CMAKE_THREAD_LIBS_INIT} - PUBLIC - $<$:pi_cuda> + piapi::piapi + ) + target_link_libraries(${LIB_OBJ_NAME} + PRIVATE + piapi::piapi ) target_compile_definitions(${LIB_OBJ_NAME} @@ -103,7 +106,6 @@ set(SYCL_SOURCES "detail/builtins_integer.cpp" "detail/builtins_math.cpp" "detail/builtins_relational.cpp" - "detail/pi.cpp" "detail/common.cpp" "detail/config.cpp" "detail/context_impl.cpp" @@ -122,6 +124,7 @@ set(SYCL_SOURCES "detail/kernel_impl.cpp" "detail/kernel_program_cache.cpp" "detail/memory_manager.cpp" + "detail/pi_hooks.cpp" "detail/platform_impl.cpp" "detail/program_impl.cpp" "detail/program_manager/program_manager.cpp" @@ -159,8 +162,6 @@ set(SYCL_SOURCES "sampler.cpp" "stream.cpp" "spirv_ops.cpp" - "$<$:detail/windows_pi.cpp>" - "$<$,$>:detail/posix_pi.cpp>" ) if (MSVC) diff --git a/sycl/source/backend/level_zero.cpp b/sycl/source/backend/level_zero.cpp index 4f7467058bcc6..308f6387d9477 100644 --- a/sycl/source/backend/level_zero.cpp +++ b/sycl/source/backend/level_zero.cpp @@ -8,9 +8,9 @@ #include #include -#include #include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/backend/opencl.cpp b/sycl/source/backend/opencl.cpp index bd06563c0b8b8..aa0a070237689 100644 --- a/sycl/source/backend/opencl.cpp +++ b/sycl/source/backend/opencl.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include diff --git a/sycl/source/context.cpp b/sycl/source/context.cpp index cf180ede28426..ddae9e6604feb 100644 --- a/sycl/source/context.cpp +++ b/sycl/source/context.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -66,9 +67,9 @@ context::context(const vector_class &DeviceList, const auto &NonHostPlatform = NonHostDevice.get_platform().get(); if (std::any_of(DeviceList.begin(), DeviceList.end(), [&](const device &CurrentDevice) { - return (CurrentDevice.is_host() || - (CurrentDevice.get_platform().get() != - NonHostPlatform)); + return (CurrentDevice.is_host() || + (CurrentDevice.get_platform().get() != + NonHostPlatform)); })) throw invalid_parameter_error( "Can't add devices across platforms to a single context.", diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index 8f54271e260f6..7246f80e2a0e2 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 0c06ba43bb758..99599c9a74cf4 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -6,9 +6,9 @@ // // ===--------------------------------------------------------------------=== // +#include #include -#include -#include +#include #include #include #include @@ -20,6 +20,7 @@ #include #include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 967c4a6cffcbe..7fcebad5e855f 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -9,7 +9,7 @@ #pragma once #include #include -#include +#include #include #include #include diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index c1ea2a347c061..6244c74eae5fa 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include diff --git a/sycl/source/detail/device_filter.cpp b/sycl/source/detail/device_filter.cpp index e7a1a0b99d8f8..04cd936abab58 100644 --- a/sycl/source/detail/device_filter.cpp +++ b/sycl/source/detail/device_filter.cpp @@ -13,9 +13,9 @@ #include -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { +namespace pi { + +using namespace cl::sycl; device_filter::device_filter(const std::string &FilterString) { const std::array, 5> @@ -124,6 +124,4 @@ void device_filter_list::addFilter(device_filter &Filter) { FilterList.push_back(Filter); } -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) +} // namespace pi diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 0e1381f933964..979ce1510e771 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -9,7 +9,7 @@ #pragma once #include -#include +#include #include #include #include diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index 4fc8be379bb0e..213f5965b6205 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -10,14 +10,14 @@ #include #include #include -#include +#include #include #include #include #include #include #include -#include +#include #include #include diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index ab846466161ca..a585c823b1aba 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -13,8 +13,8 @@ #include "error_handling.hpp" #include -#include -#include +#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/detail/error_handling/error_handling.hpp b/sycl/source/detail/error_handling/error_handling.hpp index 06bfe4cec173c..00bb3e132bc48 100644 --- a/sycl/source/detail/error_handling/error_handling.hpp +++ b/sycl/source/detail/error_handling/error_handling.hpp @@ -9,8 +9,8 @@ #pragma once #include -#include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 0f67b3238f35c..ef85924af5575 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include @@ -21,13 +21,18 @@ #include "xpti_trace_framework.hpp" #include #include + +namespace pi { +extern xpti::trace_event_data_t *GSYCLGraphEvent; +} #endif __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { + #ifdef XPTI_ENABLE_INSTRUMENTATION -extern xpti::trace_event_data_t *GSYCLGraphEvent; +using pi::GSYCLGraphEvent; #endif // Threat all devices that don't support interoperability as host devices to diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 4b291159c8e4f..e8de16be21173 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -10,18 +10,21 @@ #include #include -#include +#include #include #include #include #include +namespace pi { +class plugin; +} + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { class context; namespace detail { -class plugin; class context_impl; using ContextImplPtr = std::shared_ptr; class queue_impl; @@ -120,7 +123,7 @@ class event_impl { /// \return the Plugin associated with the context of this event. /// Should be called when this is not a Host Event. - const plugin &getPlugin() const; + const pi::plugin &getPlugin() const; /// Associate event with the context. /// diff --git a/sycl/source/detail/event_info.hpp b/sycl/source/detail/event_info.hpp index 62d4ed01c2947..03cf4e3d71792 100644 --- a/sycl/source/detail/event_info.hpp +++ b/sycl/source/detail/event_info.hpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index be4b9e200fda8..4f85312ff3bb3 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -10,9 +10,9 @@ #include #include #include -#include #include #include +#include #ifdef WIN32 #include @@ -91,16 +91,6 @@ std::mutex &GlobalHandler::getFilterMutex() { return *MFilterMutex; } -std::vector &GlobalHandler::getPlugins() { - if (MPlugins) - return *MPlugins; - - const std::lock_guard Lock{MFieldsLock}; - if (!MPlugins) - MPlugins = std::make_unique>(); - - return *MPlugins; -} device_filter_list & GlobalHandler::getDeviceFilterList(const std::string &InitValue) { if (MDeviceFilterList) diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index ae923593b5808..7dc272c3713f7 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -13,6 +13,10 @@ #include +namespace pi { +class plugin; +} + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { @@ -20,7 +24,6 @@ class platform_impl; class Scheduler; class ProgramManager; class Sync; -class plugin; class device_filter_list; using PlatformImplPtr = std::shared_ptr; @@ -54,7 +57,6 @@ class GlobalHandler { std::vector &getPlatformCache(); std::mutex &getPlatformMapMutex(); std::mutex &getFilterMutex(); - std::vector &getPlugins(); device_filter_list &getDeviceFilterList(const std::string &InitValue); private: @@ -72,7 +74,7 @@ class GlobalHandler { std::unique_ptr> MPlatformCache; std::unique_ptr MPlatformMapMutex; std::unique_ptr MFilterMutex; - std::unique_ptr> MPlugins; + std::unique_ptr> MPlugins; std::unique_ptr MDeviceFilterList; }; } // namespace detail diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 860cc5e0e7961..24a2e38b6eebf 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -9,7 +9,7 @@ #pragma once #include -#include +#include #include #include #include diff --git a/sycl/source/detail/kernel_info.hpp b/sycl/source/detail/kernel_info.hpp index 178bd273f33bb..bc3582d2af590 100644 --- a/sycl/source/detail/kernel_info.hpp +++ b/sycl/source/detail/kernel_info.hpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include diff --git a/sycl/source/detail/kernel_program_cache.cpp b/sycl/source/detail/kernel_program_cache.cpp index 18b13f3fd589c..a6d80fe8a9e05 100644 --- a/sycl/source/detail/kernel_program_cache.cpp +++ b/sycl/source/detail/kernel_program_cache.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index 478c1cc5cf3b8..59241ec4f5c3b 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/sycl/source/detail/pi_hooks.cpp b/sycl/source/detail/pi_hooks.cpp new file mode 100644 index 0000000000000..51a630fd86d8b --- /dev/null +++ b/sycl/source/detail/pi_hooks.cpp @@ -0,0 +1,44 @@ +//==---------------- pi_hooks.cpp - SYCL standard source file --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifdef XPTI_ENABLE_INSTRUMENTATION +#include "xpti_trace_framework.hpp" +#endif // XPTI_ENABLE_INSTRUMENTATION + +#include +#include + +namespace pi { + +namespace config { + +TraceLevel trace_level_mask() { + using namespace cl::sycl::detail; + return static_cast(SYCLConfig::get()); +} + +pi::backend *backend() { + using namespace cl::sycl::detail; + return SYCLConfig::get(); +} + +pi::device_filter_list *device_filter_list() { + using namespace cl::sycl::detail; + return SYCLConfig::get(); +} + +} // namespace config + +#ifdef XPTI_ENABLE_INSTRUMENTATION + +xpti::trace_event_data_t *GSYCLGraphEvent = nullptr; + +const char *SYCL_STREAM_NAME = "sycl"; +#endif // XPTI_ENABLE_INSTRUMENTATION + +} // namespace pi diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index f86809d1e024e..4565b8585a226 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index a16f5de53456a..3aeb84baf8c57 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -8,11 +8,11 @@ #pragma once #include -#include +#include #include #include #include -#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -38,10 +38,10 @@ class platform_impl { /// \param APlatform is a raw plug-in platform handle. /// \param APlugin is a plug-in handle. explicit platform_impl(RT::PiPlatform APlatform, const plugin &APlugin) - : MPlatform(APlatform), MPlugin(std::make_shared(APlugin)) {} + : MPlatform(APlatform), MPlugin(std::make_shared(APlugin)) {} explicit platform_impl(RT::PiPlatform APlatform, - std::shared_ptr APlugin) + std::shared_ptr APlugin) : MPlatform(APlatform), MPlugin(APlugin) {} ~platform_impl() = default; @@ -128,7 +128,7 @@ class platform_impl { /// Sets the platform implementation to use another plugin. /// /// \param PluginPtr is a pointer to a plugin instance - void setPlugin(std::shared_ptr PluginPtr) { + void setPlugin(std::shared_ptr PluginPtr) { assert(!MHostPlatform && "Plugin is not available for Host"); MPlugin = std::move(PluginPtr); } @@ -194,7 +194,7 @@ class platform_impl { private: bool MHostPlatform = false; RT::PiPlatform MPlatform = 0; - std::shared_ptr MPlugin; + std::shared_ptr MPlugin; std::vector> MDeviceCache; std::mutex MDeviceMapMutex; }; diff --git a/sycl/source/detail/platform_info.hpp b/sycl/source/detail/platform_info.hpp index 1ba1c970e8fe4..951d8ac8b95f8 100644 --- a/sycl/source/detail/platform_info.hpp +++ b/sycl/source/detail/platform_info.hpp @@ -9,9 +9,9 @@ #pragma once #include #include -#include +#include #include -#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/detail/plugin_sycl.hpp b/sycl/source/detail/plugin_sycl.hpp new file mode 100644 index 0000000000000..dbba7147efe43 --- /dev/null +++ b/sycl/source/detail/plugin_sycl.hpp @@ -0,0 +1,23 @@ +//==---------- plugin_sycl.hpp - SYCL wrapper for PI plugin ----*- C++ -*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===--------------------------------------------------------------------=== // + +#pragma once + +#include +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +using plugin = ::pi::plugin; + +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index aea275b25e494..f7864ed05bade 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -8,12 +8,12 @@ #include #include -#include #include #include #include #include #include +#include #include #include diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 2507d56cfeb9c..e6c1ef50f4e70 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index e4caf0787c69b..6478435c51adc 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 1441a9f3e864d..1b3f55bca0f2a 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 21513a035b732..f97e4a1ba6c9f 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -43,6 +43,11 @@ #ifdef XPTI_ENABLE_INSTRUMENTATION #include "xpti_trace_framework.hpp" + +namespace pi { +// Global graph for the application +extern xpti::trace_event_data_t *GSYCLGraphEvent; +} // namespace pi #endif __SYCL_INLINE_NAMESPACE(cl) { @@ -50,8 +55,7 @@ namespace sycl { namespace detail { #ifdef XPTI_ENABLE_INSTRUMENTATION -// Global graph for the application -extern xpti::trace_event_data_t *GSYCLGraphEvent; +using pi::GSYCLGraphEvent; #endif #ifdef __SYCL_ENABLE_GNU_DEMANGLING @@ -773,7 +777,6 @@ void AllocaCommand::printDot(std::ostream &Stream) const { Stream << " Link : " << this->MLinkedAllocaCmd << "\\n"; Stream << "\"];" << std::endl; - for (const auto &Dep : MDeps) { if (Dep.MDepCommand == nullptr) continue; @@ -911,7 +914,6 @@ cl_int ReleaseCommand::enqueueImp() { // 3. Device alloca in the pair should be in active state in order to be // correctly released. - // There is no actual memory allocation if a host alloca command is created // being linked to a device allocation. SkipRelease |= CurAllocaIsHost && !MAllocaCmd->MIsLeaderAlloca; @@ -1991,15 +1993,16 @@ cl_int ExecCGCommand::enqueueImp() { Plugin.call(RawEvents.size(), &RawEvents[0]); } std::vector ReqMemObjs; - // Extract the Mem Objects for all Requirements, to ensure they are available if - // a user ask for them inside the interop task scope - const auto& HandlerReq = ExecInterop->MRequirements; - std::for_each(std::begin(HandlerReq), std::end(HandlerReq), [&](Requirement* Req) { - AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); - auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); - interop_handler::ReqToMem ReqToMem = std::make_pair(Req, MemArg); - ReqMemObjs.emplace_back(ReqToMem); - }); + // Extract the Mem Objects for all Requirements, to ensure they are + // available if a user ask for them inside the interop task scope + const auto &HandlerReq = ExecInterop->MRequirements; + std::for_each( + std::begin(HandlerReq), std::end(HandlerReq), [&](Requirement *Req) { + AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); + auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); + interop_handler::ReqToMem ReqToMem = std::make_pair(Req, MemArg); + ReqMemObjs.emplace_back(ReqToMem); + }); std::sort(std::begin(ReqMemObjs), std::end(ReqMemObjs)); interop_handler InteropHandler(std::move(ReqMemObjs), MQueue); diff --git a/sycl/source/detail/spec_constant_impl.cpp b/sycl/source/detail/spec_constant_impl.cpp index f61db6fe4a0bc..000b58fa4165d 100644 --- a/sycl/source/detail/spec_constant_impl.cpp +++ b/sycl/source/detail/spec_constant_impl.cpp @@ -9,9 +9,9 @@ #include #include -#include #include #include +#include #include #include diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 82cddfddb8b00..3aef4122523d0 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include __SYCL_INLINE_NAMESPACE(cl) { diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index d6acee67293cf..5c78364adfbec 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include @@ -194,7 +194,7 @@ void *aligned_alloc_host(size_t Alignment, size_t Size, const context &Ctxt) { void *aligned_alloc_host(size_t Alignment, size_t Size, const queue &Q) { return aligned_alloc_host(Alignment, Size, Q.get_context()); -} +} void *aligned_alloc_shared(size_t Alignment, size_t Size, const device &Dev, const context &Ctxt) { diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 57ca9f6190615..b71a97e880730 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -13,6 +13,7 @@ #include #include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index fa36438ae09b5..a80153d5c7dc3 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include diff --git a/sycl/source/interop_handler.cpp b/sycl/source/interop_handler.cpp index f6fb55ad28120..b37ed26de282e 100644 --- a/sycl/source/interop_handler.cpp +++ b/sycl/source/interop_handler.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include #include #include diff --git a/sycl/source/platform.cpp b/sycl/source/platform.cpp index 3ab9f5b020d6d..28a568645746f 100644 --- a/sycl/source/platform.cpp +++ b/sycl/source/platform.cpp @@ -12,6 +12,7 @@ #include #include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/tools/CMakeLists.txt b/sycl/tools/CMakeLists.txt index 83c02bd94481f..1ed6a84359ede 100644 --- a/sycl/tools/CMakeLists.txt +++ b/sycl/tools/CMakeLists.txt @@ -2,22 +2,13 @@ add_subdirectory(sycl-ls) # TODO: move each tool in its own sub-directory add_executable(get_device_count_by_type get_device_count_by_type.cpp) -add_dependencies(get_device_count_by_type ocl-headers ocl-icd level-zero-loader) - -if(MSVC) - set(LEVEL_ZERO_LIBRARY - "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}ze_loader${CMAKE_STATIC_LIBRARY_SUFFIX}") -else() - set(LEVEL_ZERO_LIBRARY - "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}ze_loader${CMAKE_SHARED_LIBRARY_SUFFIX}") -endif() +add_dependencies(get_device_count_by_type ocl-headers ocl-icd) target_link_libraries(get_device_count_by_type PRIVATE OpenCL::Headers - LevelZeroLoader::Headers ${OpenCL_LIBRARIES} - ${LEVEL_ZERO_LIBRARY} + level_zero_lib $<$:cudadrv> ) target_compile_definitions(get_device_count_by_type diff --git a/sycl/tools/sycl-ls/CMakeLists.txt b/sycl/tools/sycl-ls/CMakeLists.txt index 5833d617d9c4e..16dd5115e2be8 100644 --- a/sycl/tools/sycl-ls/CMakeLists.txt +++ b/sycl/tools/sycl-ls/CMakeLists.txt @@ -5,6 +5,7 @@ target_link_libraries(sycl-ls PRIVATE sycl OpenCL::Headers + piapi::piapi ) install(TARGETS sycl-ls RUNTIME DESTINATION "bin" COMPONENT sycl-ls) diff --git a/sycl/unittests/helpers/PiMock.hpp b/sycl/unittests/helpers/PiMock.hpp index aad34cba434b5..495e94edb0b97 100644 --- a/sycl/unittests/helpers/PiMock.hpp +++ b/sycl/unittests/helpers/PiMock.hpp @@ -27,7 +27,7 @@ #include #include -#include +#include #include #include @@ -53,7 +53,7 @@ namespace RT = detail::pi; decltype(&::api) FuncPtr) { \ MPlugin->PiFunctionTable.api = FuncPtr; \ } -#include +#include #undef _PI_API /// The PiMock class wraps an instance of a SYCL platform class, diff --git a/sycl/unittests/kernel-and-program/Cache.cpp b/sycl/unittests/kernel-and-program/Cache.cpp index d7c6ed6fbf4b2..3246aa759c2d0 100644 --- a/sycl/unittests/kernel-and-program/Cache.cpp +++ b/sycl/unittests/kernel-and-program/Cache.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "CL/sycl/detail/pi.h" +#include "CL/sycl/detail/pi_sycl.hpp" #include "detail/context_impl.hpp" #include "detail/kernel_program_cache.hpp" #include "detail/program_impl.hpp" diff --git a/sycl/unittests/pi/BackendString.hpp b/sycl/unittests/pi/BackendString.hpp index 7f051f5ab6790..588d91c6c7a99 100644 --- a/sycl/unittests/pi/BackendString.hpp +++ b/sycl/unittests/pi/BackendString.hpp @@ -4,13 +4,13 @@ #pragma once -#include +#include namespace pi { -inline const char *GetBackendString(cl::sycl::backend backend) { +inline const char *GetBackendString(pi::backend backend) { switch (backend) { #define PI_BACKEND_STR(backend_name) \ - case cl::sycl::backend::backend_name: \ + case pi::backend::backend_name: \ return #backend_name PI_BACKEND_STR(cuda); PI_BACKEND_STR(host); diff --git a/sycl/unittests/pi/EnqueueMemTest.cpp b/sycl/unittests/pi/EnqueueMemTest.cpp index 8abf016e5b322..7e42da1674f3c 100644 --- a/sycl/unittests/pi/EnqueueMemTest.cpp +++ b/sycl/unittests/pi/EnqueueMemTest.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "TestGetPlugin.hpp" -#include -#include +#include +#include #include using namespace cl::sycl; diff --git a/sycl/unittests/pi/PlatformTest.cpp b/sycl/unittests/pi/PlatformTest.cpp index c3d04721cc992..3e6383f3e0cf9 100644 --- a/sycl/unittests/pi/PlatformTest.cpp +++ b/sycl/unittests/pi/PlatformTest.cpp @@ -8,8 +8,8 @@ #include "TestGetPlugin.hpp" #include -#include -#include +#include +#include #include #include diff --git a/sycl/unittests/pi/TestGetPlugin.hpp b/sycl/unittests/pi/TestGetPlugin.hpp index 6512b111f8123..950559b300920 100644 --- a/sycl/unittests/pi/TestGetPlugin.hpp +++ b/sycl/unittests/pi/TestGetPlugin.hpp @@ -6,11 +6,11 @@ #include "BackendString.hpp" #include -#include +#include #include namespace pi { -inline cl::sycl::detail::plugin initializeAndGet(cl::sycl::backend backend) { +inline cl::sycl::detail::plugin initializeAndGet(pi::backend backend) { auto plugins = cl::sycl::detail::pi::initialize(); auto it = std::find_if(plugins.begin(), plugins.end(), [=](cl::sycl::detail::plugin p) -> bool { diff --git a/sycl/unittests/pi/cuda/test_base_objects.cpp b/sycl/unittests/pi/cuda/test_base_objects.cpp index 1ae64f981931c..aaac1e4410dd1 100644 --- a/sycl/unittests/pi/cuda/test_base_objects.cpp +++ b/sycl/unittests/pi/cuda/test_base_objects.cpp @@ -12,10 +12,10 @@ #include "TestGetPlugin.hpp" #include -#include -#include -#include -#include +#include +#include +#include +#include #include const unsigned int LATEST_KNOWN_CUDA_DRIVER_API_VERSION = 3020u; diff --git a/sycl/unittests/pi/cuda/test_commands.cpp b/sycl/unittests/pi/cuda/test_commands.cpp index 3235391f93425..9092abb945b19 100644 --- a/sycl/unittests/pi/cuda/test_commands.cpp +++ b/sycl/unittests/pi/cuda/test_commands.cpp @@ -12,9 +12,9 @@ #include "TestGetPlugin.hpp" #include -#include -#include -#include +#include +#include +#include using namespace cl::sycl; diff --git a/sycl/unittests/pi/cuda/test_device.cpp b/sycl/unittests/pi/cuda/test_device.cpp index 7c8a5d756c3c7..a1f1ef13d1a88 100644 --- a/sycl/unittests/pi/cuda/test_device.cpp +++ b/sycl/unittests/pi/cuda/test_device.cpp @@ -12,9 +12,9 @@ #include "TestGetPlugin.hpp" #include -#include -#include -#include +#include +#include +#include using namespace cl::sycl; diff --git a/sycl/unittests/pi/cuda/test_interop_get_native.cpp b/sycl/unittests/pi/cuda/test_interop_get_native.cpp index 584f73ad45c00..4bc2a16e67e5a 100644 --- a/sycl/unittests/pi/cuda/test_interop_get_native.cpp +++ b/sycl/unittests/pi/cuda/test_interop_get_native.cpp @@ -13,6 +13,7 @@ #include #include #include +#include using namespace cl::sycl; diff --git a/sycl/unittests/pi/cuda/test_kernels.cpp b/sycl/unittests/pi/cuda/test_kernels.cpp index 0b57e7eb82790..3943fdf77d28e 100644 --- a/sycl/unittests/pi/cuda/test_kernels.cpp +++ b/sycl/unittests/pi/cuda/test_kernels.cpp @@ -12,9 +12,9 @@ #include "TestGetPlugin.hpp" #include -#include -#include -#include +#include +#include +#include // PI CUDA kernels carry an additional argument for the implicit global offset. #define NUM_IMPLICIT_ARGS 1 diff --git a/sycl/unittests/pi/cuda/test_mem_obj.cpp b/sycl/unittests/pi/cuda/test_mem_obj.cpp index 2441cdd14f3b0..ae45ef3e92a1c 100644 --- a/sycl/unittests/pi/cuda/test_mem_obj.cpp +++ b/sycl/unittests/pi/cuda/test_mem_obj.cpp @@ -12,10 +12,10 @@ #include "TestGetPlugin.hpp" #include -#include -#include -#include -#include +#include +#include +#include +#include using namespace cl::sycl; diff --git a/sycl/unittests/pi/cuda/test_primary_context.cpp b/sycl/unittests/pi/cuda/test_primary_context.cpp index da0036a8ebc5b..a8d383a65c6bc 100644 --- a/sycl/unittests/pi/cuda/test_primary_context.cpp +++ b/sycl/unittests/pi/cuda/test_primary_context.cpp @@ -13,7 +13,7 @@ #include "TestGetPlatforms.hpp" #include #include -#include +#include #include diff --git a/sycl/unittests/pi/cuda/test_queue.cpp b/sycl/unittests/pi/cuda/test_queue.cpp index 39ee2731df03a..0a7c49a0321b1 100644 --- a/sycl/unittests/pi/cuda/test_queue.cpp +++ b/sycl/unittests/pi/cuda/test_queue.cpp @@ -12,10 +12,10 @@ #include "TestGetPlugin.hpp" #include -#include -#include -#include -#include +#include +#include +#include +#include using namespace cl::sycl; diff --git a/sycl/unittests/pi/cuda/test_sampler_properties.cpp b/sycl/unittests/pi/cuda/test_sampler_properties.cpp index c4a19d8dc9e93..b3fa008d12976 100644 --- a/sycl/unittests/pi/cuda/test_sampler_properties.cpp +++ b/sycl/unittests/pi/cuda/test_sampler_properties.cpp @@ -8,9 +8,10 @@ #include "TestGetPlugin.hpp" #include -#include -#include + +#include #include +#include #include namespace {