diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 9ed961c0904f0..ca46824df2a5b 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -121,15 +121,43 @@ else() ) endif() +if(MSVC) + set(shared_library_dirname "bin") +else() + set(shared_library_dirname "lib${LLVM_LIBDIR_SUFFIX}") +endif() + +# Retrieves the name of the library +# For example: +# get_library_path(libname "OpenCL" "external" SHARED) +# would likely produce (depending on the system): +# set(libname "external/libOpenCL.so") +function(get_library_path out name directory library_type) + set(library_prefix "${CMAKE_${library_type}_LIBRARY_PREFIX}") + set(library_suffix "${CMAKE_${library_type}_LIBRARY_SUFFIX}") + if(NOT ("${directory}" STREQUAL "")) + set(directory "${directory}/") + endif() + set(${out} + "${directory}${library_prefix}${name}${library_suffix}" PARENT_SCOPE) +endfunction() + +# Retrieves the name of the library needed when linking +# See get_library_path +function(get_link_library_path out name directory) + if(MSVC) + get_library_path(tmp_out "${name}" "${directory}" STATIC) + else() + get_library_path(tmp_out "${name}" "${directory}" SHARED) + endif() + set(${out} "${tmp_out}" PARENT_SCOPE) +endfunction() + if( NOT OpenCL_LIBRARIES ) message(STATUS "OpenCL_LIBRARIES is missing. Will try to download OpenCL ICD Loader from github.com") + get_link_library_path(OpenCL_LIBRARIES "OpenCL" "${LLVM_LIBRARY_OUTPUT_INTDIR}") if(MSVC) - set(OpenCL_LIBRARIES - "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}OpenCL${CMAKE_STATIC_LIBRARY_SUFFIX}") list(APPEND AUX_CMAKE_FLAGS -DOPENCL_ICD_LOADER_REQUIRE_WDK=OFF) - else() - set(OpenCL_LIBRARIES - "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}OpenCL${CMAKE_SHARED_LIBRARY_SUFFIX}") endif() if (CMAKE_C_COMPILER) list(APPEND AUX_CMAKE_FLAGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}) @@ -260,6 +288,9 @@ if (LLVM_ENABLE_ASSERTIONS AND NOT SYCL_DISABLE_STL_ASSERTIONS) endif() endif() +include(piapi.cmake) +add_piapi_library() + set(SYCL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) # SYCL runtime library @@ -311,9 +342,6 @@ option(SYCL_INCLUDE_TESTS "Generate build targets for the SYCL unit tests." ${LLVM_INCLUDE_TESTS}) -# Plugin Library -add_subdirectory( plugins ) - add_subdirectory(tools) if(SYCL_INCLUDE_TESTS) @@ -362,6 +390,11 @@ if(SYCL_BUILD_PI_CUDA) "CUDA support requires adding \"libclc\" to the CMake argument \"LLVM_ENABLE_PROJECTS\"") endif() + find_package(CUDA 10.1 REQUIRED) + + set_target_properties(pi_cuda PROPERTIES + INTERFACE_LINK_LIBRARIES cudadrv + ) add_dependencies(sycl-toolchain libspirv-builtins pi_cuda) list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libspirv-builtins pi_cuda) endif() diff --git a/sycl/cmake/modules/AddSYCLExecutable.cmake b/sycl/cmake/modules/AddSYCLExecutable.cmake index e30463e4b79c4..12a97450b402a 100644 --- a/sycl/cmake/modules/AddSYCLExecutable.cmake +++ b/sycl/cmake/modules/AddSYCLExecutable.cmake @@ -2,7 +2,7 @@ macro(add_sycl_executable ARG_TARGET_NAME) cmake_parse_arguments(ARG "" "" - "OPTIONS;SOURCES;LIBRARIES;DEPENDANTS" + "OPTIONS;SOURCES;LIBRARIES;STATIC_LIBS;DEPENDANTS" ${ARGN}) set(CXX_COMPILER clang++) @@ -11,6 +11,7 @@ macro(add_sycl_executable ARG_TARGET_NAME) set(LIB_POSTFIX ".lib") else() set(LIB_PREFIX "-l") + set(SPLIT_LINK_PATH ON) endif() set(DEVICE_COMPILER_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/${CXX_COMPILER}) @@ -18,6 +19,16 @@ macro(add_sycl_executable ARG_TARGET_NAME) foreach(_lib ${ARG_LIBRARIES}) list(APPEND LINKED_LIBS "${LIB_PREFIX}${_lib}${LIB_POSTFIX}") endforeach() + foreach(_lib ${ARG_STATIC_LIBS}) + if(SPLIT_LINK_PATH) + # Note this has to be added separately so that CMake doesn't get confused + # by the space in between the two arguments + list(APPEND LINKED_LIBS "-L$") + list(APPEND LINKED_LIBS "-l:$") + else() + list(APPEND LINKED_LIBS $) + endif() + endforeach() if (LLVM_ENABLE_ASSERTIONS AND NOT SYCL_DISABLE_STL_ASSERTIONS) if(SYCL_USE_LIBCXX) diff --git a/sycl/cmake/modules/AddSYCLUnitTest.cmake b/sycl/cmake/modules/AddSYCLUnitTest.cmake index c2ddac290601d..9c84303818731 100644 --- a/sycl/cmake/modules/AddSYCLUnitTest.cmake +++ b/sycl/cmake/modules/AddSYCLUnitTest.cmake @@ -90,12 +90,19 @@ macro(add_sycl_unittest_with_device test_dirname link_variant) endif() if ("${link_variant}" MATCHES "OBJECT") + # TODO piapi integration should be fixed, + # replace it with target_link_libraries(${test_dirname} PRIVATE piapi::piapi) + # once add_sycl_executable supports that + set(pi_include_dir "${sycl_inc_dir}/../piapi/include") add_sycl_executable(${test_dirname} - OPTIONS -nolibsycl ${COMMON_OPTS} ${LLVM_PTHREAD_LIB} ${TERMINFO_LIB} + OPTIONS -nolibsycl ${COMMON_OPTS} ${LLVM_PTHREAD_LIB} ${TERMINFO_LIB} ${piapi_options} -I "${pi_include_dir}" -DPI_DPCPP_INTEGRATION SOURCES ${ARGN} $ LIBRARIES gtest_main gtest LLVMSupport LLVMTestingSupport OpenCL ${EXTRA_LIBS} + STATIC_LIBS piapi DEPENDANTS SYCLUnitTests) + add_dependencies(${test_dirname}_exec piapi) else() # TODO support shared library case. endif() + #target_link_libraries(${test_dirname}_exec PRIVATE piapi::piapi) endmacro() diff --git a/sycl/doc/PluginInterface.md b/sycl/doc/PluginInterface.md index 99c2626b89d47..730db708826fc 100644 --- a/sycl/doc/PluginInterface.md +++ b/sycl/doc/PluginInterface.md @@ -72,8 +72,8 @@ SYCL_PI_TRACE=-1 lists all PI Traces above and more debug messages. #### Plugin binary interface Plugins should implement all the Interface APIs required for the PI Version -it supports. There is [pi.def](../include/CL/sycl/detail/pi.def)/ -[pi.h](../include/CL/sycl/detail/pi.h) file listing all PI API names that can be +it supports. There is [pi.def](../piapi/include/pi/pi.def)/ +[pi.h](../piapi/include/pi/pi.h) file listing all PI API names that can be called by the specific version of Plugin Interface. It exports a function - "piPluginInit" that returns the plugin details and function pointer table containing the list of pointers to implemented Interface @@ -136,7 +136,7 @@ into - **Interoperability API** which allows interoperability with underlying runtimes such as OpenCL. -See [pi.h](../include/CL/sycl/detail/pi.h) header for the full list and +See [pi.h](../piapi/include/pi/pi.h) header for the full list and descriptions of PI APIs. ### The Core OpenCL-based PI APIs diff --git a/sycl/include/CL/sycl/backend/cuda.hpp b/sycl/include/CL/sycl/backend/cuda.hpp index 0bc3fa6f4b0aa..3e1557697f0bb 100644 --- a/sycl/include/CL/sycl/backend/cuda.hpp +++ b/sycl/include/CL/sycl/backend/cuda.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -58,4 +59,4 @@ struct interop +#include #include #include #include #include -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { - -enum class backend : char { host, opencl, level_zero, cuda, all }; - -template struct interop; - +namespace pi { inline std::ostream &operator<<(std::ostream &Out, backend be) { switch (be) { case backend::host: @@ -41,6 +36,18 @@ inline std::ostream &operator<<(std::ostream &Out, backend be) { } return Out; } +} // namespace pi + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +using backend = pi::backend; + +template struct interop; + +inline std::ostream &operator<<(std::ostream &Out, backend be) { + return pi::operator<<(Out, be); +} } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/detail/common.hpp b/sycl/include/CL/sycl/detail/common.hpp index b8b2fb68e1400..36d7afa82f73f 100644 --- a/sycl/include/CL/sycl/detail/common.hpp +++ b/sycl/include/CL/sycl/detail/common.hpp @@ -20,6 +20,14 @@ #define __SYCL_STRINGIFY_LINE_HELP(s) #s #define __SYCL_STRINGIFY_LINE(s) __SYCL_STRINGIFY_LINE_HELP(s) +#ifdef XPTI_ENABLE_INSTRUMENTATION +namespace pi { +// We define a sycl stream name and this will be used by the instrumentation +// framework +extern const char *SYCL_STREAM_NAME; +} // namespace pi +#endif // XPTI_ENABLE_INSTRUMENTATION + // Default signature enables the passing of user code location information to // public methods as a default argument. If the end-user wants to disable the // code location information, they must compile the code with @@ -27,11 +35,11 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { -// We define a sycl stream name and this will be used by the instrumentation -// framework -constexpr const char *SYCL_STREAM_NAME = "sycl"; -// Stream name being used for traces generated from the SYCL plugin layer -constexpr const char *SYCL_PICALL_STREAM_NAME = "sycl.pi"; + +#ifdef XPTI_ENABLE_INSTRUMENTATION +using pi::SYCL_STREAM_NAME; +#endif // XPTI_ENABLE_INSTRUMENTATION + // Data structure that captures the user code location information using the // builtin capabilities of the compiler struct code_location { diff --git a/sycl/include/CL/sycl/detail/device_binary_image.hpp b/sycl/include/CL/sycl/detail/device_binary_image.hpp index a74523801175d..6e458fa5547de 100644 --- a/sycl/include/CL/sycl/detail/device_binary_image.hpp +++ b/sycl/include/CL/sycl/detail/device_binary_image.hpp @@ -8,7 +8,7 @@ #pragma once #include -#include +#include #include diff --git a/sycl/include/CL/sycl/detail/device_filter.hpp b/sycl/include/CL/sycl/detail/device_filter.hpp index b65cf709d9dc0..88a74ac126070 100644 --- a/sycl/include/CL/sycl/detail/device_filter.hpp +++ b/sycl/include/CL/sycl/detail/device_filter.hpp @@ -11,44 +11,16 @@ #include #include #include +#include #include #include -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { - -struct device_filter { - backend Backend = backend::all; - info::device_type DeviceType = info::device_type::all; - int DeviceNum = 0; - bool HasBackend = false; - bool HasDeviceType = false; - bool HasDeviceNum = false; - int MatchesSeen = 0; - - device_filter(){}; - device_filter(const std::string &FilterString); - friend std::ostream &operator<<(std::ostream &Out, - const device_filter &Filter); -}; - -class device_filter_list { - std::vector FilterList; - -public: - device_filter_list() {} - device_filter_list(const std::string &FilterString); - device_filter_list(device_filter &Filter); - void addFilter(device_filter &Filter); - std::vector &get() { return FilterList; } - friend std::ostream &operator<<(std::ostream &Out, - const device_filter_list &List); -}; +namespace pi { inline std::ostream &operator<<(std::ostream &Out, const device_filter &Filter) { + namespace info = cl::sycl::info; Out << Filter.Backend << ":"; if (Filter.DeviceType == info::device_type::host) { Out << "host"; @@ -78,6 +50,15 @@ inline std::ostream &operator<<(std::ostream &Out, return Out; } +} // namespace pi + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +using pi::device_filter; +using pi::device_filter_list; + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/detail/helpers.hpp b/sycl/include/CL/sycl/detail/helpers.hpp index c6454ed01151e..d4b8efbe674a8 100644 --- a/sycl/include/CL/sycl/detail/helpers.hpp +++ b/sycl/include/CL/sycl/detail/helpers.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #if __cpp_lib_bit_cast diff --git a/sycl/include/CL/sycl/detail/pi_sycl.hpp b/sycl/include/CL/sycl/detail/pi_sycl.hpp new file mode 100644 index 0000000000000..3ac6998d51ca8 --- /dev/null +++ b/sycl/include/CL/sycl/detail/pi_sycl.hpp @@ -0,0 +1,29 @@ +//==---------------- pi_sycl.hpp - SYCL wrapper for PI ---------*- C++ -*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===--------------------------------------------------------------------=== // + +#pragma once + +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +namespace RT = ::pi; + +namespace detail { + +namespace RT = ::pi; +using PiApiKind = ::PiApiKind; +namespace pi { +using namespace ::pi; +} + +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/detail/sycl_mem_obj_i.hpp b/sycl/include/CL/sycl/detail/sycl_mem_obj_i.hpp index 274f0511a5361..80e37c135fca4 100644 --- a/sycl/include/CL/sycl/detail/sycl_mem_obj_i.hpp +++ b/sycl/include/CL/sycl/detail/sycl_mem_obj_i.hpp @@ -8,7 +8,7 @@ #pragma once -#include +#include #include __SYCL_INLINE_NAMESPACE(cl) { diff --git a/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp b/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp index 22ebb8ee97109..f305d6c1dee86 100644 --- a/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp +++ b/sycl/include/CL/sycl/detail/sycl_mem_obj_t.hpp @@ -22,6 +22,10 @@ #include #include +namespace pi { +class plugin; +} + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { @@ -29,7 +33,6 @@ namespace detail { // Forward declarations class context_impl; class event_impl; -class plugin; using ContextImplPtr = shared_ptr_class; using EventImplPtr = shared_ptr_class; @@ -86,7 +89,7 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { virtual ~SYCLMemObjT() = default; - const plugin &getPlugin() const; + const pi::plugin &getPlugin() const; __SYCL_DLL_LOCAL size_t getSize() const override { return MSizeInBytes; } __SYCL_DLL_LOCAL size_t get_count() const { diff --git a/sycl/include/CL/sycl/exception.hpp b/sycl/include/CL/sycl/exception.hpp index 0004bc311a4cd..007d8555e467a 100644 --- a/sycl/include/CL/sycl/exception.hpp +++ b/sycl/include/CL/sycl/exception.hpp @@ -12,8 +12,8 @@ #include #include -#include #include +#include #include diff --git a/sycl/include/CL/sycl/info/info_desc.hpp b/sycl/include/CL/sycl/info/info_desc.hpp index a1c368cdc5519..6a0ff02b123f8 100644 --- a/sycl/include/CL/sycl/info/info_desc.hpp +++ b/sycl/include/CL/sycl/info/info_desc.hpp @@ -9,7 +9,7 @@ #pragma once #include -#include +#include #include __SYCL_INLINE_NAMESPACE(cl) { @@ -133,16 +133,7 @@ enum class device : cl_device_info { usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT }; -enum class device_type : pi_uint64 { - cpu = PI_DEVICE_TYPE_CPU, - gpu = PI_DEVICE_TYPE_GPU, - accelerator = PI_DEVICE_TYPE_ACC, - // TODO: figure out if we need all the below in PI - custom = CL_DEVICE_TYPE_CUSTOM, - automatic, - host, - all = CL_DEVICE_TYPE_ALL -}; +using device_type = pi::device_type; enum class partition_property : cl_device_partition_property { no_partition = 0, diff --git a/sycl/include/CL/sycl/interop_handle.hpp b/sycl/include/CL/sycl/interop_handle.hpp index a09d8cee7ccaa..c3235e55b28e4 100644 --- a/sycl/include/CL/sycl/interop_handle.hpp +++ b/sycl/include/CL/sycl/interop_handle.hpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include diff --git a/sycl/piapi.cmake b/sycl/piapi.cmake new file mode 100644 index 0000000000000..b643a68304cc5 --- /dev/null +++ b/sycl/piapi.cmake @@ -0,0 +1,36 @@ +set(xpti_include_dir "") +if(SYCL_ENABLE_XPTI_TRACING) + set(xpti_include_dir "${LLVM_EXTERNAL_XPTI_SOURCE_DIR}/include") +endif() + +# Set values for the library +set(PI_BUILD_CUDA "${SYCL_BUILD_PI_CUDA}") +set(PI_BUILD_LEVEL_ZERO ON) +set(SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE "${SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE}") +set(PI_LIBDIR_SUFFIX "${LLVM_LIBDIR_SUFFIX}") +set(PI_XPTI_INCLUDE_DIR "${xpti_include_dir}") + +# Include the library +add_subdirectory(piapi) + +# Enables the piapi library +function(add_piapi_library) + add_library(piapi::piapi ALIAS piapi) +endfunction() + +# PI has special code for DPC++ integration +target_compile_definitions(piapi INTERFACE PI_DPCPP_INTEGRATION) +target_include_directories(piapi + INTERFACE + "${sycl_inc_dir}" +) + +if (MSVC AND CMAKE_BUILD_TYPE MATCHES "Debug") + set(XPTI_LIB xptid) +else() + set(XPTI_LIB xpti) +endif() +if(SYCL_ENABLE_XPTI_TRACING) + #target_link_libraries(pi_level_zero PRIVATE ${XPTI_LIB}) +endif() + diff --git a/sycl/piapi/CMakeLists.txt b/sycl/piapi/CMakeLists.txt new file mode 100644 index 0000000000000..812f891404229 --- /dev/null +++ b/sycl/piapi/CMakeLists.txt @@ -0,0 +1,165 @@ +cmake_minimum_required(VERSION 3.4) +project(piapi VERSION 2.0 LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +include(ExternalProject) +find_package(Threads REQUIRED) + +set_property(GLOBAL PROPERTY USE_FOLDERS ON) + +################################################################################ +# Options + +# TODO: Not possible to turn this off at the moment +option(PI_OPENCL_HEADERS "Should OpenCL headers be included" ON) + +option(PI_BUILD_CUDA "Whether to build the pi_cuda plugin" OFF) +option(PI_BUILD_LEVEL_ZERO "Whether to build the pi_level_zero plugin" OFF) + +set(PI_LIBDIR_SUFFIX "" CACHE STRING "Library suffix when installing") +set(PI_XPTI_INCLUDE_DIR "" CACHE STRING "Location of the XPTI include directory") + +################################################################################ +# Main library + +set(pi_include_dir "${PROJECT_SOURCE_DIR}/include") +set(pi_source_dir "${PROJECT_SOURCE_DIR}/src") +set(pi_library_base_dir "${CMAKE_CURRENT_BINARY_DIR}") +set(pi_library_output_dir "${pi_library_base_dir}/lib${PI_LIBDIR_SUFFIX}") +set(pi_binary_output_dir "${pi_library_base_dir}/bin") + +set(CONF_INCLUDE_DIRS "") + +add_library(piapi STATIC + "${pi_include_dir}/pi/pi.h" + "${pi_include_dir}/pi/pi.hpp" + "${pi_source_dir}/pi.cpp" + "$<$:${pi_source_dir}/windows_pi.cpp>" + "$<$,$>:${pi_source_dir}/posix_pi.cpp>" +) +set_target_properties(piapi PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY "${pi_library_output_dir}" +) +list(APPEND CONF_INCLUDE_DIRS "${pi_include_dir}") +list(APPEND CONF_INCLUDE_DIRS "${CMAKE_CURRENT_BINARY_DIR}") +target_include_directories(piapi PUBLIC + "$" + "$" +) +target_include_directories(piapi INTERFACE + "$" +) +target_link_libraries(piapi PUBLIC ${CMAKE_DL_LIBS}) +target_link_libraries(piapi PUBLIC Threads::Threads) +target_compile_definitions(piapi PUBLIC CL_TARGET_OPENCL_VERSION=210) + +################################################################################ +# Export header + +set(CMAKE_CXX_VISIBILITY_PRESET hidden) +set(CMAKE_VISIBILITY_INLINES_HIDDEN 1) + +# Define export header for the plugins +# We don't have the plugins yet, so we rely on a shim shared library +set(pi_export_header "${CMAKE_CURRENT_BINARY_DIR}/piapi_export.h") +add_library(pi_export_library_shim EXCLUDE_FROM_ALL SHARED "${pi_source_dir}/shim.cpp") +include(GenerateExportHeader) +generate_export_header(pi_export_library_shim + BASE_NAME PIAPI + EXPORT_MACRO_NAME PIAPI_EXPORT + EXPORT_FILE_NAME "${pi_export_header}" + STATIC_DEFINE PIAPI_STATIC +) + +################################################################################ +# OpenCL + +if(PI_OPENCL_HEADERS) + find_package(OpenCL REQUIRED) + + # Do not link against the OpenCL::OpenCL library, just need the headers + target_include_directories(piapi PUBLIC + "${OpenCL_INCLUDE_DIR}" + ) +endif() + +################################################################################ +# XPTI tracing + +if(PI_XPTI_INCLUDE_DIR) + target_compile_definitions(piapi PRIVATE XPTI_ENABLE_INSTRUMENTATION=1) + target_include_directories(piapi PUBLIC "${PI_XPTI_INCLUDE_DIR}") +endif() + +################################################################################ +# Install + +# Export the package for use from the build-tree +export(PACKAGE piapi) + +install(DIRECTORY "${pi_include_dir}" + DESTINATION . COMPONENT piapi +) +install(TARGETS piapi + EXPORT piapiTargets + ARCHIVE DESTINATION "lib${PI_LIBDIR_SUFFIX}" COMPONENT piapi + RUNTIME DESTINATION "bin" COMPONENT piapi +) + +################################################################################ +# Interface library that simplifies exporting symbols for the plugins + +add_library(pi_export_library INTERFACE) +target_include_directories(pi_export_library INTERFACE "${pi_library_output_dir}") +target_compile_definitions(pi_export_library INTERFACE + pi_export_library_shim_EXPORTS + PI_REQUIRES_EXPORT +) +target_sources(pi_export_library INTERFACE + "${pi_source_dir}/plugin_pi_hooks.cpp" +) + +################################################################################ +# Tests + +enable_testing() + +################################################################################ +# Plugins + +add_subdirectory(plugins) + +################################################################################ +# Package + +include(CMakePackageConfigHelpers) +write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/piapiConfigVersion.cmake + VERSION ${PROJECT_VERSION} + COMPATIBILITY ExactVersion +) + +configure_file(piapiConfig.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/piapiConfig.cmake + COPYONLY +) + +set(config_package_location lib/cmake/piapi) +install(EXPORT piapiTargets + FILE piapiTargets.cmake + NAMESPACE piapi:: + DESTINATION ${config_package_location} + COMPONENT piapi +) + +install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/piapiConfig.cmake + ${CMAKE_CURRENT_BINARY_DIR}/piapiConfigVersion.cmake + DESTINATION ${config_package_location} + COMPONENT piapi +) diff --git a/sycl/piapi/include/pi/device_filter.hpp b/sycl/piapi/include/pi/device_filter.hpp new file mode 100644 index 0000000000000..16f573aab7b82 --- /dev/null +++ b/sycl/piapi/include/pi/device_filter.hpp @@ -0,0 +1,46 @@ +//==---------- pi_device_filter.hpp - PI device filter ---------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "pi.hpp" + +#include +#include + +namespace pi { + +struct device_filter { + backend Backend = backend::all; + pi::device_type DeviceType = pi::device_type::all; + int DeviceNum = 0; + bool HasBackend = false; + bool HasDeviceType = false; + bool HasDeviceNum = false; + int MatchesSeen = 0; + + device_filter(){}; + device_filter(const std::string &FilterString); + friend std::ostream &operator<<(std::ostream &Out, + const device_filter &Filter); +}; + +class device_filter_list { + std::vector FilterList; + +public: + device_filter_list() {} + device_filter_list(const std::string &FilterString); + device_filter_list(device_filter &Filter); + void addFilter(device_filter &Filter); + std::vector &get() { return FilterList; } + friend std::ostream &operator<<(std::ostream &Out, + const device_filter_list &List); +}; + +} // namespace pi diff --git a/sycl/include/CL/sycl/detail/pi.def b/sycl/piapi/include/pi/pi.def similarity index 100% rename from sycl/include/CL/sycl/detail/pi.def rename to sycl/piapi/include/pi/pi.def diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/piapi/include/pi/pi.h similarity index 99% rename from sycl/include/CL/sycl/detail/pi.h rename to sycl/piapi/include/pi/pi.h index 72c2dcceb0c26..28cafc49c4346 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/piapi/include/pi/pi.h @@ -22,6 +22,20 @@ #ifndef _PI_H_ #define _PI_H_ +#ifdef PI_REQUIRES_EXPORT +#include +#else +#define PIAPI_EXPORT +#endif // PI_REQUIRES_EXPORT + +#ifdef PI_DPCPP_INTEGRATION +#include +#else +#ifndef __SYCL_EXPORT +#define __SYCL_EXPORT PIAPI_EXPORT +#endif // __SYCL_EXPORT +#endif // PI_DPCPP_INTEGRATION + // Every single change in PI API should be accompanied with the minor // version increase (+1). In the cases where backward compatibility is not // maintained there should be a (+1) change to the major version in @@ -34,9 +48,13 @@ // pi_device_binary_property_set PropertySetsBegin; // pi_device_binary_property_set PropertySetsEnd; // 2. A number of types needed to define pi_device_binary_property_set added. +// -- Version 2.0: +// * PI as a standalone library +// * Everything moved out of the cl::sycl namespace into the pi namespace +// * Global C++ objects moved into pi namespace and made external // -#define _PI_H_VERSION_MAJOR 1 -#define _PI_H_VERSION_MINOR 2 +#define _PI_H_VERSION_MAJOR 2 +#define _PI_H_VERSION_MINOR 0 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -45,9 +63,9 @@ // TODO: we need a mapping of PI to OpenCL somewhere, and this can be done // elsewhere, e.g. in the pi_opencl, but constants/enums mapping is now // done here, for efficiency and simplicity. +#include +#include #include -#include -#include #include #ifdef __cplusplus @@ -1573,7 +1591,7 @@ struct _pi_plugin { char *Targets; struct FunctionPointers { #define _PI_API(api) decltype(::api) *api; -#include +#include } PiFunctionTable; }; diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/piapi/include/pi/pi.hpp similarity index 87% rename from sycl/include/CL/sycl/detail/pi.hpp rename to sycl/piapi/include/pi/pi.hpp index 3509f9a73cbb4..12eece332542f 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/piapi/include/pi/pi.hpp @@ -13,14 +13,12 @@ #pragma once -#include -#include -#include -#include -#include +#include #include #include +#include +#include #include #include #include @@ -32,20 +30,30 @@ struct trace_event_data_t; } #endif -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { - -class context; - -namespace detail { - enum class PiApiKind { #define _PI_API(api) api, -#include +#include }; -class plugin; + namespace pi { +enum class backend : char { host, opencl, level_zero, cuda, all }; + +enum class device_type : pi_uint64 { + cpu = PI_DEVICE_TYPE_CPU, + gpu = PI_DEVICE_TYPE_GPU, + accelerator = PI_DEVICE_TYPE_ACC, + // TODO: figure out if we need all the below in PI + custom = CL_DEVICE_TYPE_CUSTOM, + automatic, + host, + all = CL_DEVICE_TYPE_ALL +}; + +// Forward declarations +class plugin; +class device_filter_list; + // The SYCL_PI_TRACE sets what we will trace. // This is a bit-mask of various things we'd want to trace. enum TraceLevel { @@ -54,23 +62,29 @@ enum TraceLevel { PI_TRACE_ALL = -1 }; +namespace config { +extern TraceLevel trace_level_mask(); +extern pi::backend *backend(); +extern pi::device_filter_list *device_filter_list(); +} // namespace config + // Return true if we want to trace PI related activities. bool trace(TraceLevel level); -#ifdef __SYCL_RT_OS_WINDOWS -#define __SYCL_OPENCL_PLUGIN_NAME "pi_opencl.dll" -#define __SYCL_LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll" -#define __SYCL_CUDA_PLUGIN_NAME "pi_cuda.dll" +#ifdef _WIN32 +#define PI_OPENCL_PLUGIN_NAME "pi_opencl.dll" +#define PI_LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll" +#define PI_CUDA_PLUGIN_NAME "pi_cuda.dll" #else -#define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.so" -#define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.so" -#define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.so" +#define PI_OPENCL_PLUGIN_NAME "libpi_opencl.so" +#define PI_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.so" +#define PI_CUDA_PLUGIN_NAME "libpi_cuda.so" #endif // Report error and no return (keeps compiler happy about no return statements). -[[noreturn]] __SYCL_EXPORT void die(const char *Message); +[[noreturn]] PIAPI_EXPORT void die(const char *Message); -__SYCL_EXPORT void assertion(bool Condition, const char *Message = nullptr); +PIAPI_EXPORT void assertion(bool Condition, const char *Message = nullptr); template void handleUnknownParamName(const char *functionName, T parameter) { @@ -85,8 +99,8 @@ void handleUnknownParamName(const char *functionName, T parameter) { // This macro is used to report invalid enumerators being passed to PI API // GetInfo functions. It will print the name of the function that invoked it // and the value of the unknown enumerator. -#define __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(parameter) \ - { cl::sycl::detail::pi::handleUnknownParamName(__func__, parameter); } +#define PI_HANDLE_UNKNOWN_PARAM_NAME(parameter) \ + { pi::handleUnknownParamName(__func__, parameter); } using PiPlugin = ::pi_plugin; using PiResult = ::pi_result; @@ -115,10 +129,6 @@ using PiMemObjectType = ::pi_mem_type; using PiMemImageChannelOrder = ::pi_image_channel_order; using PiMemImageChannelType = ::pi_image_channel_type; -__SYCL_EXPORT void contextSetExtendedDeleter(const cl::sycl::context &constext, - pi_context_extended_deleter func, - void *user_data); - // Function to load the shared library // Implementation is OS dependent. void *loadOsLibrary(const std::string &Library); @@ -133,13 +143,8 @@ std::string platformInfoToString(pi_platform_info info); // Want all the needed casts be explicit, do not define conversion operators. template To cast(From value); -// Holds the PluginInformation for the plugin that is bound. -// Currently a global variable is used to store OpenCL plugin information to be -// used with SYCL Interoperability Constructors. -extern std::shared_ptr GlobalPlugin; - // Performs PI one-time initialization. -const vector_class &initialize(); +const std::vector &initialize(); // Get the plugin serving given backend. template const plugin &getPlugin(); @@ -155,7 +160,7 @@ template struct PiFuncInfo {}; return MPlugin.PiFunctionTable.api; \ } \ }; -#include +#include /// Emits an XPTI trace before a PI API call is made /// \param FName The name of the PI API call @@ -388,8 +393,6 @@ PiDeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData, } // namespace pi -namespace RT = cl::sycl::detail::pi; - // Workaround for build with GCC 5.x // An explicit specialization shall be declared in the namespace block. // Having namespace as part of template name is not supported by GCC @@ -400,26 +403,19 @@ namespace pi { // operators. template inline To cast(From value) { // TODO: see if more sanity checks are possible. - RT::assertion((sizeof(From) == sizeof(To)), "assert: cast failed size check"); + pi::assertion((sizeof(From) == sizeof(To)), "assert: cast failed size check"); return (To)(value); } // These conversions should use PI interop API. template <> inline pi::PiProgram cast(cl_program) { - RT::assertion(false, "pi::cast -> use piextCreateProgramWithNativeHandle"); + pi::assertion(false, "pi::cast -> use piextCreateProgramWithNativeHandle"); return {}; } template <> inline pi::PiDevice cast(cl_device_id) { - RT::assertion(false, "pi::cast -> use piextCreateDeviceWithNativeHandle"); + pi::assertion(false, "pi::cast -> use piextCreateDeviceWithNativeHandle"); return {}; } } // namespace pi -} // namespace detail - -// For shortness of using PI from the top-level sycl files. -namespace RT = cl::sycl::detail::pi; - -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/detail/cuda_definitions.hpp b/sycl/piapi/include/pi/pi_cuda.h similarity index 92% rename from sycl/include/CL/sycl/detail/cuda_definitions.hpp rename to sycl/piapi/include/pi/pi_cuda.h index 9af7dbdab0152..701325e7b4717 100644 --- a/sycl/include/CL/sycl/detail/cuda_definitions.hpp +++ b/sycl/piapi/include/pi/pi_cuda.h @@ -1,4 +1,4 @@ -//==------------ cuda_definitions.hpp - SYCL CUDA backend ------------------==// +//==------------ pi_cuda.h - PI CUDA backend ------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/sycl/source/detail/plugin.hpp b/sycl/piapi/include/pi/plugin.hpp similarity index 63% rename from sycl/source/detail/plugin.hpp rename to sycl/piapi/include/pi/plugin.hpp index 756047f3adc4e..d3dead22d57a6 100644 --- a/sycl/source/detail/plugin.hpp +++ b/sycl/piapi/include/pi/plugin.hpp @@ -7,22 +7,29 @@ //===----------------------------------------------------------------------===// #pragma once -#include -#include -#include -#include + +#include #ifdef XPTI_ENABLE_INSTRUMENTATION // Include the headers necessary for emitting traces using the trace framework #include "xpti_trace_framework.h" #endif -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { -#ifdef XPTI_ENABLE_INSTRUMENTATION -extern xpti::trace_event_data_t *GPICallEvent; -#endif +#define PI_CHECK_OCL_CODE_THROW_HELPER(X, EXC) \ + if (X != 0) { \ + throw EXC; \ + } + +#ifdef PI_DPCPP_INTEGRATION +#include +#else +#ifndef __SYCL_CHECK_OCL_CODE_THROW +#define __SYCL_CHECK_OCL_CODE_THROW(X, EXC) \ + PI_CHECK_OCL_CODE_THROW_HELPER(X, EXC{}) +#endif // __SYCL_CHECK_OCL_CODE_THROW +#endif // PI_DPCPP_INTEGRATION + +namespace pi { /// The plugin class provides a unified interface to the underlying low-level /// runtimes for the device-agnostic SYCL runtime. /// @@ -31,7 +38,7 @@ class plugin { public: plugin() = delete; - plugin(RT::PiPlugin Plugin, backend UseBackend) + plugin(pi::PiPlugin Plugin, backend UseBackend) : MPlugin(Plugin), MBackend(UseBackend) {} plugin &operator=(const plugin &) = default; @@ -41,14 +48,14 @@ class plugin { ~plugin() = default; - const RT::PiPlugin &getPiPlugin() const { return MPlugin; } - RT::PiPlugin &getPiPlugin() { return MPlugin; } + const pi::PiPlugin &getPiPlugin() const { return MPlugin; } + pi::PiPlugin &getPiPlugin() { return MPlugin; } /// Checks return value from PI calls. /// /// \throw Exception if pi_result is not a PI_SUCCESS. - template - void checkPiResult(RT::PiResult pi_result) const { + template + void checkPiResult(pi::PiResult pi_result) const { __SYCL_CHECK_OCL_CODE_THROW(pi_result, Exception); } @@ -63,8 +70,8 @@ class plugin { /// /// \sa plugin::checkPiResult template - RT::PiResult call_nocheck(ArgsT... Args) const { - RT::PiFuncInfo PiCallInfo; + pi::PiResult call_nocheck(ArgsT... Args) const { + pi::PiFuncInfo PiCallInfo; #ifdef XPTI_ENABLE_INSTRUMENTATION // Emit a function_begin trace for the PI API before the call is executed. // If arguments need to be captured, then a data structure can be sent in @@ -75,12 +82,12 @@ class plugin { if (pi::trace(pi::TraceLevel::PI_TRACE_CALLS)) { std::string FnName = PiCallInfo.getFuncName(); std::cout << "---> " << FnName << "(" << std::endl; - RT::printArgs(Args...); + pi::printArgs(Args...); } - RT::PiResult R = PiCallInfo.getFuncPtr(MPlugin)(Args...); + pi::PiResult R = PiCallInfo.getFuncPtr(MPlugin)(Args...); if (pi::trace(pi::TraceLevel::PI_TRACE_CALLS)) { std::cout << ") ---> "; - RT::printArgs(R); + pi::printArgs(R); } #ifdef XPTI_ENABLE_INSTRUMENTATION // Close the function begin with a call to function end @@ -94,16 +101,27 @@ class plugin { /// \throw cl::sycl::runtime_exception if the call was not successful. template void call(ArgsT... Args) const { - RT::PiResult Err = call_nocheck(Args...); + pi::PiResult Err = call_nocheck(Args...); checkPiResult(Err); } backend getBackend(void) const { return MBackend; } private: - RT::PiPlugin MPlugin; + pi::PiPlugin MPlugin; backend MBackend; }; // class plugin -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) + +template <> +inline void +plugin::checkPiResult(pi::PiResult pi_result) const { + PI_CHECK_OCL_CODE_THROW_HELPER(pi_result, + std::runtime_error{"Invalid PIAPI call"}); +} + +// Holds the PluginInformation for the plugin that is bound. +// Currently a global variable is used to store OpenCL plugin information to be +// used with SYCL Interoperability Constructors. +extern std::shared_ptr GlobalPlugin; + +} // namespace pi diff --git a/sycl/piapi/piapiConfig.cmake.in b/sycl/piapi/piapiConfig.cmake.in new file mode 100644 index 0000000000000..7d7a67bdad996 --- /dev/null +++ b/sycl/piapi/piapiConfig.cmake.in @@ -0,0 +1,6 @@ +include(CMakeFindDependencyMacro) + +find_dependency(CUDA 10.0) +find_dependency(OpenCL) + +include(${CMAKE_CURRENT_LIST_DIR}/piapiTargets.cmake) diff --git a/sycl/piapi/plugins/CMakeLists.txt b/sycl/piapi/plugins/CMakeLists.txt new file mode 100644 index 0000000000000..555922d74513d --- /dev/null +++ b/sycl/piapi/plugins/CMakeLists.txt @@ -0,0 +1,34 @@ +set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) + +add_executable(test_pi_plugins EXCLUDE_FROM_ALL + "test.cpp" +) +target_link_libraries(test_pi_plugins PRIVATE + piapi +) +add_test(NAME pi_plugins COMMAND test_pi_plugins) + +# The test assumes pi_opencl is always available +if(WIN32) + set_tests_properties(pi_plugins PROPERTIES + ENVIRONMENT "PATH=$;$ENV{PATH}" + ) +else() + set_target_properties(test_pi_plugins PROPERTIES + INSTALL_RPATH "$" + BUILD_WITH_INSTALL_RPATH ON + ) +endif() + +add_subdirectory(opencl) +add_dependencies(test_pi_plugins pi_opencl) + +if(PI_BUILD_CUDA) + add_subdirectory(cuda) + add_dependencies(test_pi_plugins pi_cuda) +endif() + +if(PI_BUILD_LEVEL_ZERO) + add_subdirectory(level_zero) + add_dependencies(test_pi_plugins pi_level_zero) +endif () diff --git a/sycl/piapi/plugins/cuda/CMakeLists.txt b/sycl/piapi/plugins/cuda/CMakeLists.txt new file mode 100644 index 0000000000000..12d30efa7ca0b --- /dev/null +++ b/sycl/piapi/plugins/cuda/CMakeLists.txt @@ -0,0 +1,53 @@ +message(STATUS "Including the PI API CUDA backend.") + + # cannot rely on cmake support for CUDA; it assumes runtime API is being used. + # we only require the CUDA driver API to be used + # CUDA_CUDA_LIBRARY variable defines the path to libcuda.so, the CUDA Driver API library. + +find_package(CUDA 10.1 REQUIRED) + +# Make imported library global to use it within the project. +add_library(cudadrv SHARED IMPORTED GLOBAL) + +set_target_properties(cudadrv PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} +) + +if(WIN32) + set_target_properties(cudadrv PROPERTIES + IMPORTED_IMPLIB ${CUDA_CUDA_LIBRARY} + ) +else() + set_target_properties(cudadrv PROPERTIES + IMPORTED_LOCATION ${CUDA_CUDA_LIBRARY} + ) +endif() + +add_library(pi_cuda SHARED + "${pi_include_dir}/pi/pi.h" + "${pi_include_dir}/pi/pi.hpp" + "pi_cuda.hpp" + "pi_cuda.cpp" +) +set_target_properties(pi_cuda PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY "${pi_library_output_dir}" + LIBRARY_OUTPUT_DIRECTORY "${pi_library_output_dir}" + RUNTIME_OUTPUT_DIRECTORY "${pi_binary_output_dir}" +) + +target_link_libraries(pi_cuda PUBLIC piapi cudadrv) +target_link_libraries(pi_cuda PRIVATE pi_export_library) + +target_include_directories(pi_cuda PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") + +target_include_directories(pi_cuda PUBLIC ${CUDA_INCLUDE_DIRS}) + +install(TARGETS pi_cuda + EXPORT piapiTargets + LIBRARY DESTINATION "lib${PI_LIBDIR_SUFFIX}" COMPONENT pi_cuda + RUNTIME DESTINATION "bin" COMPONENT pi_cuda +) +export(TARGETS pi_cuda APPEND + FILE "${PROJECT_BINARY_DIR}/piapiTargets.cmake" +) +install(FILES "pi_cuda.hpp" DESTINATION include/pi COMPONENT headers) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/piapi/plugins/cuda/pi_cuda.cpp similarity index 93% rename from sycl/plugins/cuda/pi_cuda.cpp rename to sycl/piapi/plugins/cuda/pi_cuda.cpp index 4bcf33403a55f..c4180b3805a3d 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/piapi/plugins/cuda/pi_cuda.cpp @@ -11,10 +11,9 @@ /// /// \ingroup sycl_pi_cuda -#include -#include -#include -#include +#include "pi_cuda.hpp" +#include +#include #include #include @@ -25,6 +24,12 @@ #include #include +#if defined(__builtin_unreachable) +#define PI_BUILTIN_UNREACHABLE() __builtin_unreachable() +#else +#define PI_BUILTIN_UNREACHABLE() +#endif // defined(__builtin_unreachable) + namespace { std::string getCudaVersionString() { int driver_version = 0; @@ -238,8 +243,8 @@ pi_result getInfo(size_t param_value_size, void *param_value, int getAttribute(pi_device device, CUdevice_attribute attribute) { int value; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&value, attribute, device->get()) == CUDA_SUCCESS); + pi::assertion(cuDeviceGetAttribute(&value, attribute, device->get()) == + CUDA_SUCCESS); return value; } /// \endcond @@ -275,9 +280,6 @@ void guessLocalWorkSize(int *threadsPerBlock, const size_t *global_work_size, } // anonymous namespace /// ------ Error handling, matching OpenCL plugin semantics. -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { namespace pi { // Report error and no return (keeps compiler from printing warnings). @@ -295,9 +297,6 @@ void assertion(bool Condition, const char *Message) { } } // namespace pi -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) //-------------- // PI object implementation @@ -406,7 +405,7 @@ pi_result _pi_event::record() { try { eventId_ = queue_->get_next_event_id(); if (eventId_ == 0) { - cl::sycl::detail::pi::die( + pi::die( "Unrecoverable program state reached in event identifier overflow"); } result = PI_CHECK_ERROR(cuEventRecord(evEnd_, cuStream)); @@ -582,8 +581,7 @@ template class ReleaseGuard { // CUDA error for which it is unclear if the function that reported it // succeeded or not. Either way, the state of the program is compromised // and likely unrecoverable. - cl::sycl::detail::pi::die( - "Unrecoverable program state reached in cuda_piMemRelease"); + pi::die("Unrecoverable program state reached in cuda_piMemRelease"); } } } @@ -617,10 +615,10 @@ pi_result cuda_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, static pi_uint32 numPlatforms = 1; static _pi_platform platformId; - if (num_entries == 0 and platforms != nullptr) { + if ((num_entries == 0) && (platforms != nullptr)) { return PI_INVALID_VALUE; } - if (platforms == nullptr and num_platforms == nullptr) { + if ((platforms == nullptr) && (num_platforms == nullptr)) { return PI_INVALID_VALUE; } @@ -657,7 +655,7 @@ pi_result cuda_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, throw; } }, - err); + std::ref(err)); if (num_platforms != nullptr) { *num_platforms = numPlatforms; @@ -700,9 +698,9 @@ pi_result cuda_piPlatformGetInfo(pi_platform platform, return getInfo(param_value_size, param_value, param_value_size_ret, ""); } default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } - cl::sycl::detail::pi::die("Platform info request not implemented"); + pi::die("Platform info request not implemented"); return {}; } @@ -759,7 +757,7 @@ pi_result cuda_piContextGetInfo(pi_context context, pi_context_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, context->get_reference_count()); default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } return PI_OUT_OF_RESOURCES; @@ -795,10 +793,10 @@ pi_result cuda_piextDeviceSelectBinary(pi_device device, pi_uint32 num_binaries, pi_uint32 *selected_binary) { if (!binaries) { - cl::sycl::detail::pi::die("No list of device images provided"); + pi::die("No list of device images provided"); } if (num_binaries < 1) { - cl::sycl::detail::pi::die("No binary images in the list"); + pi::die("No binary images in the list"); } // Look for an image for the NVPTX64 target, and return the first one that is @@ -819,8 +817,7 @@ pi_result cuda_piextGetDeviceFunctionPointer(pi_device device, pi_program program, const char *function_name, pi_uint64 *function_pointer_ret) { - cl::sycl::detail::pi::die( - "cuda_piextGetDeviceFunctionPointer not implemented"); + pi::die("cuda_piextGetDeviceFunctionPointer not implemented"); return {}; } @@ -846,11 +843,10 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_DEVICE_INFO_MAX_COMPUTE_UNITS: { int compute_units = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&compute_units, - CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(compute_units >= 0); + pi::assertion(cuDeviceGetAttribute(&compute_units, + CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + device->get()) == CUDA_SUCCESS); + pi::assertion(compute_units >= 0); return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint32(compute_units)); } @@ -862,20 +858,20 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, size_t return_sizes[max_work_item_dimensions]; int max_x = 0, max_y = 0, max_z = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&max_x, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(max_x >= 0); + pi::assertion(cuDeviceGetAttribute(&max_x, + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, + device->get()) == CUDA_SUCCESS); + pi::assertion(max_x >= 0); - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&max_y, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(max_y >= 0); + pi::assertion(cuDeviceGetAttribute(&max_y, + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, + device->get()) == CUDA_SUCCESS); + pi::assertion(max_y >= 0); - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&max_z, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(max_z >= 0); + pi::assertion(cuDeviceGetAttribute(&max_z, + CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, + device->get()) == CUDA_SUCCESS); + pi::assertion(max_z >= 0); return_sizes[0] = size_t(max_x); return_sizes[1] = size_t(max_y); @@ -885,12 +881,12 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE: { int max_work_group_size = 0; - cl::sycl::detail::pi::assertion( + pi::assertion( cuDeviceGetAttribute(&max_work_group_size, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(max_work_group_size >= 0); + pi::assertion(max_work_group_size >= 0); return getInfo(param_value_size, param_value, param_value_size_ret, size_t(max_work_group_size)); @@ -939,10 +935,10 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY: { int clock_freq = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&clock_freq, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(clock_freq >= 0); + pi::assertion(cuDeviceGetAttribute(&clock_freq, + CU_DEVICE_ATTRIBUTE_CLOCK_RATE, + device->get()) == CUDA_SUCCESS); + pi::assertion(clock_freq >= 0); return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint32(clock_freq) / 1000u); } @@ -958,8 +954,7 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, // CL_DEVICE_TYPE_CUSTOM. size_t global = 0; - cl::sycl::detail::pi::assertion(cuDeviceTotalMem(&global, device->get()) == - CUDA_SUCCESS); + pi::assertion(cuDeviceTotalMem(&global, device->get()) == CUDA_SUCCESS); auto quarter_global = static_cast(global / 4u); @@ -988,17 +983,16 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: { // Take the smaller of maximum surface and maximum texture height. int tex_height = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&tex_height, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(tex_height >= 0); + pi::assertion(cuDeviceGetAttribute( + &tex_height, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT, + device->get()) == CUDA_SUCCESS); + pi::assertion(tex_height >= 0); int surf_height = 0; - cl::sycl::detail::pi::assertion( + pi::assertion( cuDeviceGetAttribute(&surf_height, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT, device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(surf_height >= 0); + pi::assertion(surf_height >= 0); int min = std::min(tex_height, surf_height); @@ -1007,17 +1001,15 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH: { // Take the smaller of maximum surface and maximum texture width. int tex_width = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&tex_width, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(tex_width >= 0); + pi::assertion(cuDeviceGetAttribute( + &tex_width, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(tex_width >= 0); int surf_width = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&surf_width, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(surf_width >= 0); + pi::assertion(cuDeviceGetAttribute( + &surf_width, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(surf_width >= 0); int min = std::min(tex_width, surf_width); @@ -1026,17 +1018,16 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: { // Take the smaller of maximum surface and maximum texture height. int tex_height = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&tex_height, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(tex_height >= 0); + pi::assertion(cuDeviceGetAttribute( + &tex_height, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT, + device->get()) == CUDA_SUCCESS); + pi::assertion(tex_height >= 0); int surf_height = 0; - cl::sycl::detail::pi::assertion( + pi::assertion( cuDeviceGetAttribute(&surf_height, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT, device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(surf_height >= 0); + pi::assertion(surf_height >= 0); int min = std::min(tex_height, surf_height); @@ -1045,17 +1036,15 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH: { // Take the smaller of maximum surface and maximum texture width. int tex_width = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&tex_width, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(tex_width >= 0); + pi::assertion(cuDeviceGetAttribute( + &tex_width, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(tex_width >= 0); int surf_width = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&surf_width, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(surf_width >= 0); + pi::assertion(cuDeviceGetAttribute( + &surf_width, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(surf_width >= 0); int min = std::min(tex_width, surf_width); @@ -1064,17 +1053,15 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH: { // Take the smaller of maximum surface and maximum texture depth. int tex_depth = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&tex_depth, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(tex_depth >= 0); + pi::assertion(cuDeviceGetAttribute( + &tex_depth, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(tex_depth >= 0); int surf_depth = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&surf_depth, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(surf_depth >= 0); + pi::assertion(cuDeviceGetAttribute( + &surf_depth, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(surf_depth >= 0); int min = std::min(tex_depth, surf_depth); @@ -1083,17 +1070,15 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: { // Take the smaller of maximum surface and maximum texture width. int tex_width = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&tex_width, - CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(tex_width >= 0); + pi::assertion(cuDeviceGetAttribute( + &tex_width, CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(tex_width >= 0); int surf_width = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&surf_width, - CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(surf_width >= 0); + pi::assertion(cuDeviceGetAttribute( + &surf_width, CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH, + device->get()) == CUDA_SUCCESS); + pi::assertion(surf_width >= 0); int min = std::min(tex_width, surf_width); @@ -1117,10 +1102,9 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: { int mem_base_addr_align = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&mem_base_addr_align, - CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, - device->get()) == CUDA_SUCCESS); + pi::assertion(cuDeviceGetAttribute(&mem_base_addr_align, + CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, + device->get()) == CUDA_SUCCESS); // Multiply by 8 as clGetDeviceInfo returns this value in bits mem_base_addr_align *= 8; return getInfo(param_value_size, param_value, param_value_size_ret, @@ -1155,10 +1139,10 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE: { int cache_size = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&cache_size, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, - device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(cache_size >= 0); + pi::assertion(cuDeviceGetAttribute(&cache_size, + CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, + device->get()) == CUDA_SUCCESS); + pi::assertion(cache_size >= 0); // The L2 cache is global to the GPU. return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64(cache_size)); @@ -1166,18 +1150,17 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_GLOBAL_MEM_SIZE: { size_t bytes = 0; // Runtime API has easy access to this value, driver API info is scarse. - cl::sycl::detail::pi::assertion(cuDeviceTotalMem(&bytes, device->get()) == - CUDA_SUCCESS); + pi::assertion(cuDeviceTotalMem(&bytes, device->get()) == CUDA_SUCCESS); return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64{bytes}); } case PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE: { int constant_memory = 0; - cl::sycl::detail::pi::assertion( + pi::assertion( cuDeviceGetAttribute(&constant_memory, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(constant_memory >= 0); + pi::assertion(constant_memory >= 0); return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64(constant_memory)); @@ -1197,32 +1180,31 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, // CUDA has its own definition of "local memory", which maps to OpenCL's // "private memory". int local_mem_size = 0; - cl::sycl::detail::pi::assertion( + pi::assertion( cuDeviceGetAttribute(&local_mem_size, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion(local_mem_size >= 0); + pi::assertion(local_mem_size >= 0); return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64(local_mem_size)); } case PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: { int ecc_enabled = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&ecc_enabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, - device->get()) == CUDA_SUCCESS); + pi::assertion(cuDeviceGetAttribute(&ecc_enabled, + CU_DEVICE_ATTRIBUTE_ECC_ENABLED, + device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion((ecc_enabled == 0) | (ecc_enabled == 1)); + pi::assertion((ecc_enabled == 0) | (ecc_enabled == 1)); auto result = static_cast(ecc_enabled); return getInfo(param_value_size, param_value, param_value_size_ret, result); } case PI_DEVICE_INFO_HOST_UNIFIED_MEMORY: { int is_integrated = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&is_integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, - device->get()) == CUDA_SUCCESS); + pi::assertion(cuDeviceGetAttribute(&is_integrated, + CU_DEVICE_ATTRIBUTE_INTEGRATED, + device->get()) == CUDA_SUCCESS); - cl::sycl::detail::pi::assertion((is_integrated == 0) | - (is_integrated == 1)); + pi::assertion((is_integrated == 0) | (is_integrated == 1)); auto result = static_cast(is_integrated); return getInfo(param_value_size, param_value, param_value_size_ret, result); } @@ -1274,9 +1256,8 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, case PI_DEVICE_INFO_NAME: { static constexpr size_t MAX_DEVICE_NAME_LENGTH = 256u; char name[MAX_DEVICE_NAME_LENGTH]; - cl::sycl::detail::pi::assertion( - cuDeviceGetName(name, MAX_DEVICE_NAME_LENGTH, device->get()) == - CUDA_SUCCESS); + pi::assertion(cuDeviceGetName(name, MAX_DEVICE_NAME_LENGTH, + device->get()) == CUDA_SUCCESS); return getInfoArray(strlen(name) + 1, param_value_size, param_value, param_value_size_ret, name); } @@ -1453,9 +1434,9 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } - cl::sycl::detail::pi::die("Device info request not implemented"); + pi::die("Device info request not implemented"); return {}; } @@ -1483,8 +1464,7 @@ pi_result cuda_piextDeviceGetNativeHandle(pi_device device, pi_result cuda_piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, pi_platform platform, pi_device *device) { - cl::sycl::detail::pi::die( - "Creation of PI device from native handle not implemented"); + pi::die("Creation of PI device from native handle not implemented"); return {}; } @@ -1646,8 +1626,7 @@ pi_result cuda_piextContextGetNativeHandle(pi_context context, /// \return TBD pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context *context) { - cl::sycl::detail::pi::die( - "Creation of PI context from native handle not implemented"); + pi::die("Creation of PI context from native handle not implemented"); return {}; } @@ -1783,8 +1762,7 @@ pi_result cuda_piMemRelease(pi_mem memObj) { // error for which it is unclear if the function that reported it succeeded // or not. Either way, the state of the program is compromised and likely // unrecoverable. - cl::sycl::detail::pi::die( - "Unrecoverable program state reached in cuda_piMemRelease"); + pi::die("Unrecoverable program state reached in cuda_piMemRelease"); } return PI_SUCCESS; @@ -1862,7 +1840,7 @@ pi_result cuda_piMemGetInfo(pi_mem memObj, cl_mem_info queriedInfo, size_t expectedQuerySize, void *queryOutput, size_t *writtenQuerySize) { - cl::sycl::detail::pi::die("cuda_piMemGetInfo not implemented"); + pi::die("cuda_piMemGetInfo not implemented"); } /// Gets the native CUDA handle of a PI mem object @@ -1887,8 +1865,7 @@ pi_result cuda_piextMemGetNativeHandle(pi_mem mem, /// \return TBD pi_result cuda_piextMemCreateWithNativeHandle(pi_native_handle nativeHandle, pi_mem *mem) { - cl::sycl::detail::pi::die( - "Creation of PI mem from native handle not implemented"); + pi::die("Creation of PI mem from native handle not implemented"); return {}; } @@ -1963,9 +1940,9 @@ pi_result cuda_piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, command_queue->properties_); default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } - cl::sycl::detail::pi::die("Queue info request not implemented"); + pi::die("Queue info request not implemented"); return {}; } @@ -2049,8 +2026,7 @@ pi_result cuda_piextQueueGetNativeHandle(pi_queue queue, pi_result cuda_piextQueueCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, pi_queue *queue) { - cl::sycl::detail::pi::die( - "Creation of PI queue from native handle not implemented"); + pi::die("Creation of PI queue from native handle not implemented"); return {}; } @@ -2256,9 +2232,8 @@ pi_result cuda_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, arrayDesc.Format != CU_AD_FORMAT_SIGNED_INT32 && arrayDesc.Format != CU_AD_FORMAT_HALF && arrayDesc.Format != CU_AD_FORMAT_FLOAT) { - cl::sycl::detail::pi::die( - "PI CUDA kernels only support images with channel types int32, " - "uint32, float, and half."); + pi::die("PI CUDA kernels only support images with channel types int32, " + "uint32, float, and half."); } CUsurfObject cuSurf = arg_mem->mem_.surface_mem_.get_surface(); kernel->set_kernel_arg(arg_index, sizeof(cuSurf), (void *)&cuSurf); @@ -2419,7 +2394,7 @@ pi_result cuda_piEnqueueNativeKernel( pi_uint32 num_mem_objects, const pi_mem *mem_list, const void **args_mem_loc, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - cl::sycl::detail::pi::die("Not implemented in CUDA backend"); + pi::die("Not implemented in CUDA backend"); return {}; } @@ -2438,8 +2413,7 @@ pi_result cuda_piMemImageCreate(pi_context context, pi_mem_flags flags, // TODO: check SYCL CTS and spec. May also have to support BGRA if (image_format->image_channel_order != pi_image_channel_order::PI_IMAGE_CHANNEL_ORDER_RGBA) { - cl::sycl::detail::pi::die( - "cuda_piMemImageCreate only supports RGBA channel order"); + pi::die("cuda_piMemImageCreate only supports RGBA channel order"); } // We have to use cuArray3DCreate, which has some caveats. The height and @@ -2499,8 +2473,7 @@ pi_result cuda_piMemImageCreate(pi_context context, pi_mem_flags flags, pixel_type_size_bytes = 4; break; default: - cl::sycl::detail::pi::die( - "cuda_piMemImageCreate given unsupported image_channel_data_type"); + pi::die("cuda_piMemImageCreate given unsupported image_channel_data_type"); } // When a dimension isn't used image_desc has the size set to 1 @@ -2583,7 +2556,7 @@ pi_result cuda_piMemImageCreate(pi_context context, pi_mem_flags flags, pi_result cuda_piMemImageGetInfo(pi_mem image, pi_image_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - cl::sycl::detail::pi::die("cuda_piMemImageGetInfo not implemented"); + pi::die("cuda_piMemImageGetInfo not implemented"); return {}; } @@ -2601,7 +2574,7 @@ pi_result cuda_piclProgramCreateWithSource(pi_context context, pi_uint32 count, const char **strings, const size_t *lengths, pi_program *program) { - cl::sycl::detail::pi::die("cuda_piclProgramCreateWithSource not implemented"); + pi::die("cuda_piclProgramCreateWithSource not implemented"); return {}; } @@ -2636,7 +2609,7 @@ pi_result cuda_piProgramBuild(pi_program program, pi_uint32 num_devices, /// \TODO Not implemented pi_result cuda_piProgramCreate(pi_context context, const void *il, size_t length, pi_program *res_program) { - cl::sycl::detail::pi::die("cuda_piProgramCreate not implemented"); + pi::die("cuda_piProgramCreate not implemented"); return {}; } @@ -2708,9 +2681,9 @@ pi_result cuda_piProgramGetInfo(pi_program program, pi_program_info param_name, getKernelNames(program).c_str()); } default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } - cl::sycl::detail::pi::die("Program info request not implemented"); + pi::die("Program info request not implemented"); return {}; } @@ -2825,9 +2798,9 @@ pi_result cuda_piProgramGetBuildInfo(pi_program program, pi_device device, return getInfoArray(program->MAX_LOG_SIZE, param_value_size, param_value, param_value_size_ret, program->infoLog_); default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } - cl::sycl::detail::pi::die("Program Build info request not implemented"); + pi::die("Program Build info request not implemented"); return {}; } @@ -2894,8 +2867,7 @@ pi_result cuda_piextProgramGetNativeHandle(pi_program program, pi_result cuda_piextProgramCreateWithNativeHandle(pi_native_handle nativeHandle, pi_context context, pi_program *program) { - cl::sycl::detail::pi::die( - "Creation of PI program from native handle not implemented"); + pi::die("Creation of PI program from native handle not implemented"); return {}; } @@ -2927,7 +2899,7 @@ pi_result cuda_piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, ""); } default: { - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } } } @@ -2947,10 +2919,9 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, switch (param_name) { case PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: { int max_threads = 0; - cl::sycl::detail::pi::assertion( - cuFuncGetAttribute(&max_threads, - CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, - kernel->get()) == CUDA_SUCCESS); + pi::assertion(cuFuncGetAttribute(&max_threads, + CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, + kernel->get()) == CUDA_SUCCESS); return getInfo(param_value_size, param_value, param_value_size_ret, size_t(max_threads)); } @@ -2968,32 +2939,32 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, case PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: { // OpenCL LOCAL == CUDA SHARED int bytes = 0; - cl::sycl::detail::pi::assertion( - cuFuncGetAttribute(&bytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, - kernel->get()) == CUDA_SUCCESS); + pi::assertion(cuFuncGetAttribute(&bytes, + CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, + kernel->get()) == CUDA_SUCCESS); return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64(bytes)); } case PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { // Work groups should be multiples of the warp size int warpSize = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&warpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, - device->get()) == CUDA_SUCCESS); + pi::assertion(cuDeviceGetAttribute(&warpSize, + CU_DEVICE_ATTRIBUTE_WARP_SIZE, + device->get()) == CUDA_SUCCESS); return getInfo(param_value_size, param_value, param_value_size_ret, static_cast(warpSize)); } case PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: { // OpenCL PRIVATE == CUDA LOCAL int bytes = 0; - cl::sycl::detail::pi::assertion( - cuFuncGetAttribute(&bytes, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, - kernel->get()) == CUDA_SUCCESS); + pi::assertion(cuFuncGetAttribute(&bytes, + CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, + kernel->get()) == CUDA_SUCCESS); return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64(bytes)); } default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } } @@ -3006,7 +2977,7 @@ pi_result cuda_piKernelGetSubGroupInfo( pi_kernel kernel, pi_device device, cl_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - cl::sycl::detail::pi::die("cuda_piKernelGetSubGroupInfo not implemented"); + pi::die("cuda_piKernelGetSubGroupInfo not implemented"); return {}; } @@ -3055,7 +3026,7 @@ pi_result cuda_piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index, // Events // pi_result cuda_piEventCreate(pi_context context, pi_event *event) { - cl::sycl::detail::pi::die("PI Event Create not implemented in CUDA backend"); + pi::die("PI Event Create not implemented in CUDA backend"); } pi_result cuda_piEventGetInfo(pi_event event, pi_event_info param_name, @@ -3081,7 +3052,7 @@ pi_result cuda_piEventGetInfo(pi_event event, pi_event_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, event->get_context()); default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } return PI_INVALID_EVENT; @@ -3114,9 +3085,9 @@ pi_result cuda_piEventGetProfilingInfo(pi_event event, return getInfo(param_value_size, param_value, param_value_size_ret, event->get_end_time()); default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } - cl::sycl::detail::pi::die("Event Profiling info request not implemented"); + pi::die("Event Profiling info request not implemented"); return {}; } @@ -3124,13 +3095,13 @@ pi_result cuda_piEventSetCallback(pi_event event, pi_int32 command_exec_callback_type, pfn_notify notify, void *user_data) { - cl::sycl::detail::pi::die("Event Callback not implemented in CUDA backend"); + pi::die("Event Callback not implemented in CUDA backend"); return PI_SUCCESS; } pi_result cuda_piEventSetStatus(pi_event event, pi_int32 execution_status) { - cl::sycl::detail::pi::die("Event Set Status not implemented in CUDA backend"); + pi::die("Event Set Status not implemented in CUDA backend"); return PI_INVALID_VALUE; } @@ -3139,9 +3110,8 @@ pi_result cuda_piEventRetain(pi_event event) { const auto refCount = event->increment_reference_count(); - cl::sycl::detail::pi::assertion( - refCount != 0, - "Reference count overflow detected in cuda_piEventRetain."); + pi::assertion(refCount != 0, + "Reference count overflow detected in cuda_piEventRetain."); return PI_SUCCESS; } @@ -3151,9 +3121,8 @@ pi_result cuda_piEventRelease(pi_event event) { // double delete or someone is messing with the ref count. // either way, cannot safely proceed. - cl::sycl::detail::pi::assertion( - event->get_reference_count() != 0, - "Reference count overflow detected in cuda_piEventRelease."); + pi::assertion(event->get_reference_count() != 0, + "Reference count overflow detected in cuda_piEventRelease."); // decrement ref count. If it is 0, delete the event. if (event->decrement_reference_count() == 0) { @@ -3233,8 +3202,7 @@ pi_result cuda_piextEventGetNativeHandle(pi_event event, /// \return TBD pi_result cuda_piextEventCreateWithNativeHandle(pi_native_handle nativeHandle, pi_event *event) { - cl::sycl::detail::pi::die( - "Creation of PI event from native handle not implemented"); + pi::die("Creation of PI event from native handle not implemented"); return {}; } @@ -3334,7 +3302,7 @@ pi_result cuda_piSamplerGetInfo(pi_sampler sampler, cl_sampler_info param_name, addressing_prop); } default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } return {}; } @@ -3361,9 +3329,8 @@ pi_result cuda_piSamplerRelease(pi_sampler sampler) { // double delete or someone is messing with the ref count. // either way, cannot safely proceed. - cl::sycl::detail::pi::assertion( - sampler->get_reference_count() != 0, - "Reference count overflow detected in cuda_piSamplerRelease."); + pi::assertion(sampler->get_reference_count() != 0, + "Reference count overflow detected in cuda_piSamplerRelease."); // decrement ref count. If it is 0, delete the sampler. if (sampler->decrement_reference_count() == 0) { @@ -3742,7 +3709,7 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR array_desc) { case CU_AD_FORMAT_FLOAT: return 4; } - cl::sycl::detail::pi::die("Invalid iamge format."); + pi::die("Invalid image format."); return 0; } @@ -4025,7 +3992,7 @@ pi_result cuda_piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - cl::sycl::detail::pi::die("cuda_piEnqueueMemImageFill not implemented"); + pi::die("cuda_piEnqueueMemImageFill not implemented"); return {}; } @@ -4218,7 +4185,7 @@ pi_result cuda_piextUSMFree(pi_context context, void *ptr) { unsigned int type; result = PI_CHECK_ERROR(cuPointerGetAttribute( &type, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)ptr)); - assert(type == CU_MEMORYTYPE_DEVICE or type == CU_MEMORYTYPE_HOST); + assert((type == CU_MEMORYTYPE_DEVICE) || (type == CU_MEMORYTYPE_HOST)); if (type == CU_MEMORYTYPE_DEVICE) { result = PI_CHECK_ERROR(cuMemFree((CUdeviceptr)ptr)); } @@ -4397,7 +4364,7 @@ pi_result cuda_piextUSMGetMemAllocInfo(pi_context context, const void *ptr, } result = PI_CHECK_ERROR(cuPointerGetAttribute( &value, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)ptr)); - assert(value == CU_MEMORYTYPE_DEVICE or value == CU_MEMORYTYPE_HOST); + assert((value == CU_MEMORYTYPE_DEVICE) || (value == CU_MEMORYTYPE_HOST)); if (value == CU_MEMORYTYPE_DEVICE) { // pointer to device memory return getInfo(param_value_size, param_value, param_value_size_ret, @@ -4409,7 +4376,7 @@ pi_result cuda_piextUSMGetMemAllocInfo(pi_context context, const void *ptr, PI_MEM_TYPE_HOST); } // should never get here - __builtin_unreachable(); + PI_BUILTIN_UNREACHABLE(); return getInfo(param_value_size, param_value, param_value_size_ret, PI_MEM_TYPE_UNKNOWN); } diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/piapi/plugins/cuda/pi_cuda.hpp similarity index 99% rename from sycl/plugins/cuda/pi_cuda.hpp rename to sycl/piapi/plugins/cuda/pi_cuda.hpp index 15de78d44bff2..60e1717bc2520 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/piapi/plugins/cuda/pi_cuda.hpp @@ -18,19 +18,28 @@ #ifndef PI_CUDA_HPP #define PI_CUDA_HPP -#include "CL/sycl/detail/pi.h" +#include + #include #include #include #include #include +#include #include +#include +#include #include +#include #include #include #include -#include -#include + +#if defined(__GNUC__) && !defined(__llvm__) && (__GNUC__ < 7) +namespace std { +using atomic_uint32_t = std::atomic; +} +#endif // GCC pre 7 extern "C" { diff --git a/sycl/piapi/plugins/level_zero/CMakeLists.txt b/sycl/piapi/plugins/level_zero/CMakeLists.txt new file mode 100755 index 0000000000000..e2afde38d296d --- /dev/null +++ b/sycl/piapi/plugins/level_zero/CMakeLists.txt @@ -0,0 +1,109 @@ +# PI Level Zero plugin library + +set(LEVEL_ZERO_LOADER + "${pi_library_output_dir}/${CMAKE_STATIC_LIBRARY_PREFIX}ze_loader") +if(MSVC) + set(LEVEL_ZERO_LOADER "${LEVEL_ZERO_LOADER}${CMAKE_STATIC_LIBRARY_SUFFIX}") +else() + set(LEVEL_ZERO_LOADER "${LEVEL_ZERO_LOADER}${CMAKE_SHARED_LIBRARY_SUFFIX}") +endif() + +if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) + message(STATUS "Download Level Zero loader and headers from github.com") + if (CMAKE_C_COMPILER) + list(APPEND AUX_CMAKE_FLAGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}) + endif() + if (CMAKE_CXX_COMPILER) + list(APPEND AUX_CMAKE_FLAGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}) + endif() + + set(level_zero_loader_binary_dir "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_build") + file(MAKE_DIRECTORY "${level_zero_loader_binary_dir}") + + set(level_zero_loader_install_dir "${pi_library_base_dir}") + + set(LEVEL_ZERO_LOADER_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/Level0/level_zero_loader") + if (NOT DEFINED SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE) + set(SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE ${SYCL_EXTERNAL_PROJECTS_SKIP_AUTO_UPDATE}) + endif() + + ExternalProject_Add(level-zero-loader + GIT_REPOSITORY https://github.com/oneapi-src/level-zero.git + GIT_TAG v1.0 + UPDATE_DISCONNECTED ${SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE} + SOURCE_DIR ${LEVEL_ZERO_LOADER_SOURCE_DIR} + BINARY_DIR "${level_zero_loader_binary_dir}" + INSTALL_DIR "${level_zero_loader_install_dir}" + CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} + -DOpenCL_INCLUDE_DIR=${OpenCL_INCLUDE_DIRS} + -DCMAKE_INSTALL_PREFIX= + -DCMAKE_INSTALL_LIBDIR:PATH=lib${PI_LIBDIR_SUFFIX} + LOG_DOWNLOAD 1 + LOG_UPDATE 1 + LOG_CONFIGURE 1 + LOG_BUILD 1 + LOG_INSTALL 1 + ${AUX_CMAKE_FLAGS} + STEP_TARGETS configure,build,install + BUILD_BYPRODUCTS ${LEVEL_ZERO_LOADER} + ) + set(LEVEL_ZERO_INCLUDE_DIR "${level_zero_loader_install_dir}/include") + file(MAKE_DIRECTORY "${LEVEL_ZERO_INCLUDE_DIR}") +else() + include_directories("${LEVEL_ZERO_INCLUDE_DIR}") + file(GLOB LEVEL_ZERO_LIBRARY_SRC "${LEVEL_ZERO_LIBRARY}*") + file(COPY ${LEVEL_ZERO_LIBRARY_SRC} DESTINATION ${pi_library_output_dir}) + add_custom_target(level-zero-loader DEPENDS ${LEVEL_ZERO_LIBRARY} COMMENT "Copying Level Zero Loader ...") +endif() + +add_library(level_zero_lib INTERFACE IMPORTED GLOBAL) +set_target_properties(level_zero_lib PROPERTIES + INTERFACE_LINK_LIBRARIES ${LEVEL_ZERO_LOADER} + INTERFACE_INCLUDE_DIRECTORIES "${LEVEL_ZERO_INCLUDE_DIR}" +) +add_dependencies(level_zero_lib level-zero-loader) + +add_library(pi_level_zero SHARED + "${pi_include_dir}/pi/pi.h" + "pi_level_zero.cpp" + "pi_level_zero.hpp" + "usm_allocator.cpp" + "usm_allocator.hpp" +) + +set_target_properties(pi_level_zero PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY "${pi_library_output_dir}" + LIBRARY_OUTPUT_DIRECTORY "${pi_library_output_dir}" + RUNTIME_OUTPUT_DIRECTORY "${pi_binary_output_dir}" +) + +target_link_libraries(pi_level_zero PUBLIC piapi) +target_link_libraries(pi_level_zero PRIVATE + level_zero_lib + pi_export_library +) +if (UNIX) + target_link_libraries(pi_level_zero PRIVATE pthread) +endif() + +################################################################################ +## install + +install(TARGETS pi_level_zero + EXPORT piapiTargets + LIBRARY DESTINATION "lib${PI_LIBDIR_SUFFIX}" COMPONENT pi_level_zero + RUNTIME DESTINATION "bin" COMPONENT pi_level_zero) +export(TARGETS pi_level_zero APPEND + FILE "${PROJECT_BINARY_DIR}/piapiTargets.cmake" +) +install(FILES "pi_level_zero.hpp" DESTINATION include/pi COMPONENT headers) +install(DIRECTORY "${LEVEL_ZERO_INCLUDE_DIR}/" + DESTINATION "include" + COMPONENT headers +) +install(DIRECTORY "${pi_library_output_dir}" + DESTINATION . + COMPONENT pi_level_zero + FILES_MATCHING PATTERN "${CMAKE_STATIC_LIBRARY_PREFIX}ze*" +) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/piapi/plugins/level_zero/pi_level_zero.cpp similarity index 99% rename from sycl/plugins/level_zero/pi_level_zero.cpp rename to sycl/piapi/plugins/level_zero/pi_level_zero.cpp index d8b35467d545f..d0d8a21ea9f57 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/piapi/plugins/level_zero/pi_level_zero.cpp @@ -2828,7 +2828,7 @@ zeModuleDynamicLinkMock(uint32_t numModules, ze_module_handle_t *phModules, // is only a single module that is "linked" to itself. There is nothing to // do in this degenerate case. if (numModules > 1) { - die("piProgramLink: Program Linking is not supported yet in Level0"); + die("piProgramLink: Program Linking is not supported yet in Level Zero"); } // The mock does not support the link log. @@ -5107,7 +5107,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { #define _PI_API(api) \ (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); -#include +#include return PI_SUCCESS; } diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/piapi/plugins/level_zero/pi_level_zero.hpp similarity index 99% rename from sycl/plugins/level_zero/pi_level_zero.hpp rename to sycl/piapi/plugins/level_zero/pi_level_zero.hpp index c552df2556fb3..93d0659ead407 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/piapi/plugins/level_zero/pi_level_zero.hpp @@ -18,7 +18,6 @@ #ifndef PI_LEVEL_ZERO_HPP #define PI_LEVEL_ZERO_HPP -#include #include #include #include @@ -27,13 +26,14 @@ #include #include #include +#include #include #include -#include - #include "usm_allocator.hpp" +#include + template To pi_cast(From Value) { // TODO: see if more sanity checks are possible. assert(sizeof(From) == sizeof(To)); diff --git a/sycl/plugins/level_zero/usm_allocator.cpp b/sycl/piapi/plugins/level_zero/usm_allocator.cpp similarity index 100% rename from sycl/plugins/level_zero/usm_allocator.cpp rename to sycl/piapi/plugins/level_zero/usm_allocator.cpp diff --git a/sycl/plugins/level_zero/usm_allocator.hpp b/sycl/piapi/plugins/level_zero/usm_allocator.hpp similarity index 100% rename from sycl/plugins/level_zero/usm_allocator.hpp rename to sycl/piapi/plugins/level_zero/usm_allocator.hpp diff --git a/sycl/piapi/plugins/opencl/CMakeLists.txt b/sycl/piapi/plugins/opencl/CMakeLists.txt new file mode 100644 index 0000000000000..584f0aa39225a --- /dev/null +++ b/sycl/piapi/plugins/opencl/CMakeLists.txt @@ -0,0 +1,36 @@ +#TODO: +#1. Figure out why CMP0057 has to be set. Should have been taken care of earlier in the build +#2. Use AddLLVM to modify the build and access config options +#cmake_policy(SET CMP0057 NEW) +#include(AddLLVM) + +find_package(OpenCL REQUIRED) + +# Plugin for OpenCL +# Create Shared library for libpi_opencl.so. +#TODO: remove dependency on pi.hpp in sycl project. +#TODO: Currently, the pi.hpp header is common between sycl and plugin library sources. +#This can be changed by copying the pi.hpp file in the plugins project. + +add_library(pi_opencl SHARED + "${pi_include_dir}/pi/pi.h" + "pi_opencl.cpp" +) +set_target_properties(pi_opencl PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY "${pi_library_output_dir}" + LIBRARY_OUTPUT_DIRECTORY "${pi_library_output_dir}" + RUNTIME_OUTPUT_DIRECTORY "${pi_binary_output_dir}" +) + +target_include_directories(pi_opencl PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") +target_link_libraries(pi_opencl PUBLIC piapi OpenCL::OpenCL) +target_link_libraries(pi_opencl PRIVATE pi_export_library) + +install(TARGETS pi_opencl + EXPORT piapiTargets + LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT pi_opencl + RUNTIME DESTINATION "bin" COMPONENT pi_opencl +) +export(TARGETS pi_opencl APPEND + FILE "${PROJECT_BINARY_DIR}/piapiTargets.cmake" +) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/piapi/plugins/opencl/pi_opencl.cpp similarity index 99% rename from sycl/plugins/opencl/pi_opencl.cpp rename to sycl/piapi/plugins/opencl/pi_opencl.cpp index 025437b8ba8a9..a5198470b7c59 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/piapi/plugins/opencl/pi_opencl.cpp @@ -14,8 +14,10 @@ /// /// \ingroup sycl_pi_ocl -#include -#include +#include +#include +#include +#include #include #include diff --git a/sycl/piapi/plugins/test.cpp b/sycl/piapi/plugins/test.cpp new file mode 100644 index 0000000000000..0a8c1cf371276 --- /dev/null +++ b/sycl/piapi/plugins/test.cpp @@ -0,0 +1,74 @@ +//==---------- test.cpp - Test piapi library -------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include + +// Configuration hooks +namespace pi { +namespace config { +TraceLevel trace_level_mask() { return {TraceLevel::PI_TRACE_ALL}; } +pi::backend *backend() { return nullptr; } +pi::device_filter_list *device_filter_list() { return nullptr; } +} // namespace config +} // namespace pi + +std::string platform_info_string(pi::plugin &p, pi_platform plt, + pi_platform_info param) { + std::size_t paramSize = 0; + p.call(plt, param, 0, nullptr, ¶mSize); + + if (paramSize == 0) { + std::cout << "Empty platform name" << std::endl; + return ""; + } + + std::vector platformNameBuffer; + platformNameBuffer.resize(paramSize); + + p.call(plt, param, paramSize, + platformNameBuffer.data(), nullptr); + + return platformNameBuffer.data(); +} + +int main() { + auto plugins = pi::initialize(); + + std::cout << "Num plugins: " << plugins.size() << std::endl; + + for (auto &p : plugins) { + std::cout << "=== plugin" << std::endl; + + pi_uint32 numPlatforms = 0; + p.call(0, nullptr, &numPlatforms); + + std::cout << "Num platforms: " << numPlatforms << std::endl; + + if (numPlatforms == 0) { + continue; + } + + std::vector platforms; + platforms.resize(numPlatforms); + p.call(numPlatforms, platforms.data(), nullptr); + + for (auto &plt : platforms) { + std::cout << "Platform name: " + << platform_info_string(p, plt, PI_PLATFORM_INFO_NAME) + << std::endl; + + std::cout << "Platform version: " + << platform_info_string(p, plt, PI_PLATFORM_INFO_VERSION) + << std::endl; + } + } + + return 0; +} diff --git a/sycl/source/detail/pi.cpp b/sycl/piapi/src/pi.cpp similarity index 87% rename from sycl/source/detail/pi.cpp rename to sycl/piapi/src/pi.cpp index 0ff4081e96ade..baf8de9b8dc80 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/piapi/src/pi.cpp @@ -11,41 +11,30 @@ /// /// \ingroup sycl_pi -#include "context_impl.hpp" -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include #include +#include #include #include #include +#include #include -#include +#include #include +#include +#include +#include #ifdef XPTI_ENABLE_INSTRUMENTATION // Include the headers necessary for emitting // traces using the trace framework #include "xpti_trace_framework.h" -#endif -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { -#ifdef XPTI_ENABLE_INSTRUMENTATION -// Global (to the SYCL runtime) graph handle that all command groups are a -// child of -/// Event to be used by graph related activities -xpti_td *GSYCLGraphEvent = nullptr; -/// Event to be used by PI layer related activities -xpti_td *GPICallEvent = nullptr; /// Constants being used as placeholder until one is able to reliably get the /// version of the SYCL runtime constexpr uint32_t GMajVer = 1; @@ -55,9 +44,29 @@ constexpr const char *GVerStr = "sycl 1.0"; namespace pi { -static void initializePlugins(vector_class *Plugins); +#ifdef XPTI_ENABLE_INSTRUMENTATION + +// We define a sycl stream name and this will be used by the instrumentation +// framework +extern const char *SYCL_STREAM_NAME; + +// Global (to the SYCL runtime) graph handle that all command groups are a +// child of +/// Event to be used by graph related activities +extern xpti_td *GSYCLGraphEvent; + +/// Event to be used by PI layer related activities +xpti_td *GPICallEvent = nullptr; + +// Stream name being used for traces generated from the SYCL plugin layer +constexpr const char *SYCL_PICALL_STREAM_NAME = "sycl.pi"; + +#endif // XPTI_ENABLE_INSTRUMENTATION + +static void initializePlugins(std::vector *Plugins); bool XPTIInitDone = false; +std::shared_ptr GlobalPlugin; // Implementation of the SYCL PI API call tracing methods that use XPTI // framework to emit these traces that will be used by tools. @@ -120,16 +129,6 @@ void emitFunctionEndTrace(uint64_t CorrelationID, const char *FName) { #endif // XPTI_ENABLE_INSTRUMENTATION } -void contextSetExtendedDeleter(const cl::sycl::context &context, - pi_context_extended_deleter func, - void *user_data) { - auto impl = getSyclObjImpl(context); - auto contextHandle = reinterpret_cast(impl->getHandleRef()); - auto plugin = impl->getPlugin(); - plugin.call_nocheck( - contextHandle, func, user_data); -} - std::string platformInfoToString(pi_platform_info info) { switch (info) { case PI_PLATFORM_INFO_PROFILE: @@ -144,7 +143,7 @@ std::string platformInfoToString(pi_platform_info info) { return "PI_PLATFORM_INFO_EXTENSIONS"; default: die("Unknown pi_platform_info value passed to " - "cl::sycl::detail::pi::platformInfoToString"); + "pi::platformInfoToString"); } } @@ -206,23 +205,18 @@ std::string memFlagsToString(pi_mem_flags Flags) { return Sstream.str(); } -// GlobalPlugin is a global Plugin used with Interoperability constructors that -// use OpenCL objects to construct SYCL class objects. -std::shared_ptr GlobalPlugin; - // Find the plugin at the appropriate location and return the location. -bool findPlugins(vector_class> &PluginNames) { +bool findPlugins(std::vector> &PluginNames) { // TODO: Based on final design discussions, change the location where the // plugin must be searched; how to identify the plugins etc. Currently the // search is done for libpi_opencl.so/pi_opencl.dll file in LD_LIBRARY_PATH // env only. // - device_filter_list *FilterList = SYCLConfig::get(); + device_filter_list *FilterList = config::device_filter_list(); if (!FilterList) { - PluginNames.emplace_back(__SYCL_OPENCL_PLUGIN_NAME, backend::opencl); - PluginNames.emplace_back(__SYCL_LEVEL_ZERO_PLUGIN_NAME, - backend::level_zero); - PluginNames.emplace_back(__SYCL_CUDA_PLUGIN_NAME, backend::cuda); + PluginNames.emplace_back(PI_OPENCL_PLUGIN_NAME, backend::opencl); + PluginNames.emplace_back(PI_LEVEL_ZERO_PLUGIN_NAME, backend::level_zero); + PluginNames.emplace_back(PI_CUDA_PLUGIN_NAME, backend::cuda); } else { std::vector Filters = FilterList->get(); bool OpenCLFound = false; @@ -232,16 +226,16 @@ bool findPlugins(vector_class> &PluginNames) { backend Backend = Filter.Backend; if (!OpenCLFound && (Backend == backend::opencl || Backend == backend::all)) { - PluginNames.emplace_back(__SYCL_OPENCL_PLUGIN_NAME, backend::opencl); + PluginNames.emplace_back(PI_OPENCL_PLUGIN_NAME, backend::opencl); OpenCLFound = true; } else if (!LevelZeroFound && (Backend == backend::level_zero || Backend == backend::all)) { - PluginNames.emplace_back(__SYCL_LEVEL_ZERO_PLUGIN_NAME, + PluginNames.emplace_back(PI_LEVEL_ZERO_PLUGIN_NAME, backend::level_zero); LevelZeroFound = true; } else if (!CudaFound && (Backend == backend::cuda || Backend == backend::all)) { - PluginNames.emplace_back(__SYCL_CUDA_PLUGIN_NAME, backend::cuda); + PluginNames.emplace_back(PI_CUDA_PLUGIN_NAME, backend::cuda); CudaFound = true; } } @@ -280,23 +274,22 @@ bool bindPlugin(void *Library, PiPlugin *PluginInformation) { } bool trace(TraceLevel Level) { - auto TraceLevelMask = SYCLConfig::get(); + auto TraceLevelMask = config::trace_level_mask(); return (TraceLevelMask & Level) == Level; } // Initializes all available Plugins. -const vector_class &initialize() { - static std::once_flag PluginsInitDone; - - std::call_once(PluginsInitDone, []() { - initializePlugins(&GlobalHandler::instance().getPlugins()); - }); - - return GlobalHandler::instance().getPlugins(); +const std::vector &initialize() { + static std::vector *Plugins = []() { + auto PluginsPtr = new std::vector; + initializePlugins(PluginsPtr); + return PluginsPtr; + }(); + return *Plugins; } -static void initializePlugins(vector_class *Plugins) { - vector_class> PluginNames; +static void initializePlugins(std::vector *Plugins) { + std::vector> PluginNames; findPlugins(PluginNames); if (PluginNames.empty() && trace(PI_TRACE_ALL)) @@ -327,7 +320,7 @@ static void initializePlugins(vector_class *Plugins) { } continue; } - backend *BE = SYCLConfig::get(); + backend *BE = config::backend(); // Use OpenCL as the default interoperability plugin. // This will go away when we make backend interoperability selection // explicit in SYCL-2020. @@ -406,15 +399,15 @@ template const plugin &getPlugin() { if (Plugin) return *Plugin; - const vector_class &Plugins = pi::initialize(); + const std::vector &Plugins = pi::initialize(); for (const auto &P : Plugins) if (P.getBackend() == BE) { Plugin = &P; return *Plugin; } - throw runtime_error("pi::getPlugin couldn't find plugin", - PI_INVALID_OPERATION); + throw std::runtime_error("pi::getPlugin couldn't find plugin (" + + std::to_string(PI_INVALID_OPERATION) + ")"); } template const plugin &getPlugin(); @@ -524,7 +517,7 @@ pi_uint32 DeviceBinaryProperty::asUint32() const { assert(Prop->Type == PI_PROPERTY_TYPE_UINT32 && "property type mismatch"); // if type fits into the ValSize - it is used to store the property value assert(Prop->ValAddr == nullptr && "primitive types must be stored inline"); - return sycl::detail::pi::asUint32(&Prop->ValSize); + return pi::asUint32(&Prop->ValSize); } ByteArray DeviceBinaryProperty::asByteArray() const { @@ -558,16 +551,16 @@ void DeviceBinaryImage::PropertyRange::init(pi_device_binary Bin, End = Begin ? PS->PropertiesEnd : nullptr; } -RT::PiDeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData, +pi::PiDeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData, size_t ImgSize) { struct { - RT::PiDeviceBinaryType Fmt; + pi::PiDeviceBinaryType Fmt; const uint32_t Magic; } Fmts[] = {{PI_DEVICE_BINARY_TYPE_SPIRV, 0x07230203}, {PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE, 0xDEC04342}}; if (ImgSize >= sizeof(Fmts[0].Magic)) { - detail::remove_const_t Hdr = 0; + typename std::remove_const::type Hdr = 0; std::copy(ImgData, ImgData + sizeof(Hdr), reinterpret_cast(&Hdr)); for (const auto &Fmt : Fmts) { @@ -597,6 +590,3 @@ void DeviceBinaryImage::init(pi_device_binary Bin) { } } // namespace pi -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/piapi/src/plugin_pi_hooks.cpp b/sycl/piapi/src/plugin_pi_hooks.cpp new file mode 100644 index 0000000000000..d07a138cbf262 --- /dev/null +++ b/sycl/piapi/src/plugin_pi_hooks.cpp @@ -0,0 +1,24 @@ +//==---------- plugin_pi_hooks.cpp - PI library hooks ----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +// We just need basic definitions for the plugins to link properly +// These symbols aren't actually used in plugins, +// any library linking against piapi directly +// still needs to define its own hooks + +namespace pi { +namespace config { + +TraceLevel trace_level_mask() { return {TraceLevel::PI_TRACE_ALL}; } +pi::backend *backend() { return nullptr; } +pi::device_filter_list *device_filter_list() { return nullptr; } + +} // namespace config +} // namespace pi diff --git a/sycl/source/detail/posix_pi.cpp b/sycl/piapi/src/posix_pi.cpp similarity index 81% rename from sycl/source/detail/posix_pi.cpp rename to sycl/piapi/src/posix_pi.cpp index db21e2fa01ced..dc1516e67e28a 100644 --- a/sycl/source/detail/posix_pi.cpp +++ b/sycl/piapi/src/posix_pi.cpp @@ -6,14 +6,9 @@ // //===----------------------------------------------------------------------===// -#include - #include #include -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { namespace pi { void *loadOsLibrary(const std::string &PluginPath) { @@ -27,6 +22,3 @@ void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName) { } } // namespace pi -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/piapi/src/shim.cpp b/sycl/piapi/src/shim.cpp new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sycl/source/detail/windows_pi.cpp b/sycl/piapi/src/windows_pi.cpp similarity index 79% rename from sycl/source/detail/windows_pi.cpp rename to sycl/piapi/src/windows_pi.cpp index 3e7d87cc86efd..dc48247e19872 100644 --- a/sycl/source/detail/windows_pi.cpp +++ b/sycl/piapi/src/windows_pi.cpp @@ -6,15 +6,10 @@ // //===----------------------------------------------------------------------===// -#include - +#include #include #include -#include -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { namespace pi { void *loadOsLibrary(const std::string &PluginPath) { @@ -26,6 +21,3 @@ void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName) { } } // namespace pi -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/plugins/CMakeLists.txt b/sycl/plugins/CMakeLists.txt deleted file mode 100644 index 700e09d1a0c1d..0000000000000 --- a/sycl/plugins/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) - -if(SYCL_BUILD_PI_CUDA) - add_subdirectory(cuda) -endif() - -add_subdirectory(opencl) -add_subdirectory(level_zero) diff --git a/sycl/plugins/cuda/CMakeLists.txt b/sycl/plugins/cuda/CMakeLists.txt deleted file mode 100644 index 1c230b795a10e..0000000000000 --- a/sycl/plugins/cuda/CMakeLists.txt +++ /dev/null @@ -1,51 +0,0 @@ -message(STATUS "Including the PI API CUDA backend.") - - # cannot rely on cmake support for CUDA; it assumes runtime API is being used. - # we only require the CUDA driver API to be used - # CUDA_CUDA_LIBRARY variable defines the path to libcuda.so, the CUDA Driver API library. - -find_package(CUDA 10.1 REQUIRED) - -# Make imported library global to use it within the project. -add_library(cudadrv SHARED IMPORTED GLOBAL) - -set_target_properties( - cudadrv PROPERTIES - IMPORTED_LOCATION ${CUDA_CUDA_LIBRARY} - INTERFACE_INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} -) - -add_library(pi_cuda SHARED - "${sycl_inc_dir}/CL/sycl/detail/pi.h" - "${sycl_inc_dir}/CL/sycl/detail/pi.hpp" - "pi_cuda.hpp" - "pi_cuda.cpp" -) - -add_dependencies(pi_cuda - ocl-headers -) - -add_dependencies(sycl-toolchain pi_cuda) - -set_target_properties(pi_cuda PROPERTIES LINKER_LANGUAGE CXX) - -target_include_directories(pi_cuda - PRIVATE - ${sycl_inc_dir} - PUBLIC - ${CUDA_INCLUDE_DIRS} -) - -target_link_libraries(pi_cuda PUBLIC OpenCL-Headers cudadrv) - -add_common_options(pi_cuda) - -install(TARGETS pi_cuda - LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT pi_cuda - RUNTIME DESTINATION "bin" COMPONENT pi_cuda -) - -# `sycl/source/CMakeLists.txt` adapted when SYCL_BUILD_PI_CUDA is defined: -# target_link_libraries(sycl PUBLIC pi_cuda) -# target_compile_definitions(sycl PUBLIC USE_PI_CUDA) diff --git a/sycl/plugins/ld-version-script.txt b/sycl/plugins/ld-version-script.txt deleted file mode 100644 index 1ad2c6d5f8390..0000000000000 --- a/sycl/plugins/ld-version-script.txt +++ /dev/null @@ -1,10 +0,0 @@ -{ - /* in CMakelists.txt, we pass -fvisibility=hidden compiler flag */ - /* This file is used to give exception of the hidden visibility */ - /* Export only pi* function symbols which are individually marked 'default' visibility */ - - global: pi*; - - /* all other symbols are local scope, meaning not exported */ - local: *; -}; diff --git a/sycl/plugins/level_zero/CMakeLists.txt b/sycl/plugins/level_zero/CMakeLists.txt deleted file mode 100755 index b9127deb803f4..0000000000000 --- a/sycl/plugins/level_zero/CMakeLists.txt +++ /dev/null @@ -1,116 +0,0 @@ -# PI Level Zero plugin library - -if(MSVC) - set(LEVEL_ZERO_LOADER - "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}ze_loader${CMAKE_STATIC_LIBRARY_SUFFIX}") -else() - set(LEVEL_ZERO_LOADER - "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}ze_loader${CMAKE_SHARED_LIBRARY_SUFFIX}") -endif() - -if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) - message(STATUS "Download Level Zero loader and headers from github.com") - if (CMAKE_C_COMPILER) - list(APPEND AUX_CMAKE_FLAGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}) - endif() - if (CMAKE_CXX_COMPILER) - list(APPEND AUX_CMAKE_FLAGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}) - endif() - file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_build) - set(LEVEL_ZERO_LOADER_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/level_zero/level_zero_loader") - if (NOT DEFINED SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE) - set(SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE ${SYCL_EXTERNAL_PROJECTS_SKIP_AUTO_UPDATE}) - endif() - ExternalProject_Add(level-zero-loader - GIT_REPOSITORY https://github.com/oneapi-src/level-zero.git - GIT_TAG v1.0 - UPDATE_DISCONNECTED ${SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE} - SOURCE_DIR ${LEVEL_ZERO_LOADER_SOURCE_DIR} - BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_build" - INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_install" - CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} - -DOpenCL_INCLUDE_DIR=${OpenCL_INCLUDE_DIRS} - -DCMAKE_INSTALL_PREFIX= - -DCMAKE_INSTALL_LIBDIR:PATH=lib${LLVM_LIBDIR_SUFFIX} - ${AUX_CMAKE_FLAGS} - LOG_DOWNLOAD 1 - LOG_UPDATE 1 - LOG_CONFIGURE 1 - LOG_BUILD 1 - LOG_INSTALL 1 - STEP_TARGETS configure,build,install - DEPENDS ocl-headers - BUILD_BYPRODUCTS ${LEVEL_ZERO_LOADER} - ) - ExternalProject_Add_Step(level-zero-loader llvminstall - COMMAND ${CMAKE_COMMAND} -E copy_directory / ${LLVM_BINARY_DIR} - COMMENT "Installing level-zero-loader into the LLVM binary directory" - DEPENDEES install - ) - - install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_install/" - DESTINATION "." - COMPONENT level-zero-loader - ) - - list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS level-zero-loader) -else() - include_directories("${LEVEL_ZERO_INCLUDE_DIR}") - file(GLOB LEVEL_ZERO_LIBRARY_SRC "${LEVEL_ZERO_LIBRARY}*") - file(COPY ${LEVEL_ZERO_LIBRARY_SRC} DESTINATION ${LLVM_LIBRARY_OUTPUT_INTDIR}) - add_custom_target(level-zero-loader DEPENDS ${LEVEL_ZERO_LIBRARY} COMMENT "Copying Level Zero Loader ...") -endif() - -add_library (LevelZeroLoader-Headers INTERFACE) -add_library (LevelZeroLoader::Headers ALIAS LevelZeroLoader-Headers) -target_include_directories(LevelZeroLoader-Headers - INTERFACE "${LEVEL_ZERO_INCLUDE_DIR}" -) - -include_directories("${sycl_inc_dir}") -include_directories(${OPENCL_INCLUDE}) - -add_library(pi_level_zero SHARED - "${sycl_inc_dir}/CL/sycl/detail/pi.h" - "${CMAKE_CURRENT_SOURCE_DIR}/pi_level_zero.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/pi_level_zero.hpp" - "${CMAKE_CURRENT_SOURCE_DIR}/usm_allocator.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/usm_allocator.hpp" -) - -if (MSVC) - # by defining __SYCL_BUILD_SYCL_DLL, we can use __declspec(dllexport) - # which are individually tagged for all pi* symbols in pi.h - target_compile_definitions(pi_level_zero PRIVATE __SYCL_BUILD_SYCL_DLL) -else() - # we set the visibility of all symbols 'hidden' by default. - # In pi.h file, we set exported symbols with visibility==default individually - target_compile_options(pi_level_zero PUBLIC -fvisibility=hidden) - - # This script file is used to allow exporting pi* symbols only. - # All other symbols are regarded as local (hidden) - set(linker_script "${CMAKE_CURRENT_SOURCE_DIR}/../ld-version-script.txt") - - # Filter symbols based on the scope defined in the script file, - # and export pi* function symbols in the library. - target_link_libraries( pi_level_zero - PRIVATE "-Wl,--version-script=${linker_script}" - ) -endif() - -if (TARGET level-zero-loader) - add_dependencies(pi_level_zero level-zero-loader) -endif() - add_dependencies(sycl-toolchain pi_level_zero) - - target_link_libraries(pi_level_zero PRIVATE "${LEVEL_ZERO_LOADER}") -if (UNIX) - target_link_libraries(pi_level_zero PRIVATE pthread) -endif() - -add_common_options(pi_level_zero) - -install(TARGETS pi_level_zero - LIBRARY DESTINATION "lib" COMPONENT pi_level_zero - RUNTIME DESTINATION "bin" COMPONENT pi_level_zero) diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt deleted file mode 100644 index 30674c1d5621b..0000000000000 --- a/sycl/plugins/opencl/CMakeLists.txt +++ /dev/null @@ -1,59 +0,0 @@ -#TODO: -#1. Figure out why CMP0057 has to be set. Should have been taken care of earlier in the build -#2. Use AddLLVM to modify the build and access config options -#cmake_policy(SET CMP0057 NEW) -#include(AddLLVM) - -# Plugin for OpenCL -# Create Shared library for libpi_opencl.so. -#TODO: remove dependency on pi.hpp in sycl project. -#TODO: Currently, the pi.hpp header is common between sycl and plugin library sources. -#This can be changed by copying the pi.hpp file in the plugins project. - -add_library(pi_opencl SHARED - "${sycl_inc_dir}/CL/sycl/detail/pi.h" - "pi_opencl.cpp" - ) - -add_dependencies(pi_opencl - ocl-icd - ocl-headers -) - -add_dependencies(sycl-toolchain pi_opencl) - -set_target_properties(pi_opencl PROPERTIES LINKER_LANGUAGE CXX) - -#preprocessor definitions for compiling a target's sources. We do not need it for pi_opencl -target_include_directories(pi_opencl PRIVATE "${sycl_inc_dir}") - -#link pi_opencl with OpenCL headers and ICD Loader. -target_link_libraries( pi_opencl - PRIVATE OpenCL::Headers - PRIVATE ${OpenCL_LIBRARIES} -) -if (MSVC) - # by defining __SYCL_BUILD_SYCL_DLL, we can use __declspec(dllexport) - # which are individually tagged for all pi* symbols in pi.h - target_compile_definitions(pi_opencl PRIVATE __SYCL_BUILD_SYCL_DLL) -else() - # we set the visibility of all symbols 'hidden' by default. - # In pi.h file, we set exported symbols with visibility==default individually - target_compile_options(pi_opencl PUBLIC -fvisibility=hidden) - - # This script file is used to allow exporting pi* symbols only. - # All other symbols are regarded as local (hidden) - set(linker_script "${CMAKE_CURRENT_SOURCE_DIR}/../ld-version-script.txt") - - # Filter symbols based on the scope defined in the script file, - # and export pi* function symbols in the library. - target_link_libraries( pi_opencl - PRIVATE "-Wl,--version-script=${linker_script}" - ) -endif() - -add_common_options(pi_opencl) - -install(TARGETS pi_opencl - LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT pi_opencl - RUNTIME DESTINATION "bin" COMPONENT pi_opencl) diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 890743b7c6c58..1f2039d354503 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -76,8 +76,11 @@ function(add_sycl_rt_library LIB_NAME) ${OpenCL_LIBRARIES} ${CMAKE_DL_LIBS} ${CMAKE_THREAD_LIBS_INIT} - PUBLIC - $<$:pi_cuda> + piapi::piapi + ) + target_link_libraries(${LIB_OBJ_NAME} + PRIVATE + piapi::piapi ) target_compile_definitions(${LIB_OBJ_NAME} @@ -103,7 +106,6 @@ set(SYCL_SOURCES "detail/builtins_integer.cpp" "detail/builtins_math.cpp" "detail/builtins_relational.cpp" - "detail/pi.cpp" "detail/common.cpp" "detail/config.cpp" "detail/context_impl.cpp" @@ -122,6 +124,7 @@ set(SYCL_SOURCES "detail/kernel_impl.cpp" "detail/kernel_program_cache.cpp" "detail/memory_manager.cpp" + "detail/pi_hooks.cpp" "detail/platform_impl.cpp" "detail/program_impl.cpp" "detail/program_manager/program_manager.cpp" @@ -159,8 +162,6 @@ set(SYCL_SOURCES "sampler.cpp" "stream.cpp" "spirv_ops.cpp" - "$<$:detail/windows_pi.cpp>" - "$<$,$>:detail/posix_pi.cpp>" ) if (MSVC) diff --git a/sycl/source/backend/level_zero.cpp b/sycl/source/backend/level_zero.cpp index 4f7467058bcc6..308f6387d9477 100644 --- a/sycl/source/backend/level_zero.cpp +++ b/sycl/source/backend/level_zero.cpp @@ -8,9 +8,9 @@ #include #include -#include #include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/backend/opencl.cpp b/sycl/source/backend/opencl.cpp index bd06563c0b8b8..aa0a070237689 100644 --- a/sycl/source/backend/opencl.cpp +++ b/sycl/source/backend/opencl.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include diff --git a/sycl/source/context.cpp b/sycl/source/context.cpp index cf180ede28426..ddae9e6604feb 100644 --- a/sycl/source/context.cpp +++ b/sycl/source/context.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -66,9 +67,9 @@ context::context(const vector_class &DeviceList, const auto &NonHostPlatform = NonHostDevice.get_platform().get(); if (std::any_of(DeviceList.begin(), DeviceList.end(), [&](const device &CurrentDevice) { - return (CurrentDevice.is_host() || - (CurrentDevice.get_platform().get() != - NonHostPlatform)); + return (CurrentDevice.is_host() || + (CurrentDevice.get_platform().get() != + NonHostPlatform)); })) throw invalid_parameter_error( "Can't add devices across platforms to a single context.", diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index 8f54271e260f6..7246f80e2a0e2 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 0c06ba43bb758..99599c9a74cf4 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -6,9 +6,9 @@ // // ===--------------------------------------------------------------------=== // +#include #include -#include -#include +#include #include #include #include @@ -20,6 +20,7 @@ #include #include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 967c4a6cffcbe..7fcebad5e855f 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -9,7 +9,7 @@ #pragma once #include #include -#include +#include #include #include #include diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index c1ea2a347c061..6244c74eae5fa 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include diff --git a/sycl/source/detail/device_filter.cpp b/sycl/source/detail/device_filter.cpp index e7a1a0b99d8f8..04cd936abab58 100644 --- a/sycl/source/detail/device_filter.cpp +++ b/sycl/source/detail/device_filter.cpp @@ -13,9 +13,9 @@ #include -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace detail { +namespace pi { + +using namespace cl::sycl; device_filter::device_filter(const std::string &FilterString) { const std::array, 5> @@ -124,6 +124,4 @@ void device_filter_list::addFilter(device_filter &Filter) { FilterList.push_back(Filter); } -} // namespace detail -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) +} // namespace pi diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 0e1381f933964..979ce1510e771 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -9,7 +9,7 @@ #pragma once #include -#include +#include #include #include #include diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index 4fc8be379bb0e..213f5965b6205 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -10,14 +10,14 @@ #include #include #include -#include +#include #include #include #include #include #include #include -#include +#include #include #include diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index ab846466161ca..a585c823b1aba 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -13,8 +13,8 @@ #include "error_handling.hpp" #include -#include -#include +#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/detail/error_handling/error_handling.hpp b/sycl/source/detail/error_handling/error_handling.hpp index 06bfe4cec173c..00bb3e132bc48 100644 --- a/sycl/source/detail/error_handling/error_handling.hpp +++ b/sycl/source/detail/error_handling/error_handling.hpp @@ -9,8 +9,8 @@ #pragma once #include -#include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 0f67b3238f35c..ef85924af5575 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include @@ -21,13 +21,18 @@ #include "xpti_trace_framework.hpp" #include #include + +namespace pi { +extern xpti::trace_event_data_t *GSYCLGraphEvent; +} #endif __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { + #ifdef XPTI_ENABLE_INSTRUMENTATION -extern xpti::trace_event_data_t *GSYCLGraphEvent; +using pi::GSYCLGraphEvent; #endif // Threat all devices that don't support interoperability as host devices to diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 4b291159c8e4f..e8de16be21173 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -10,18 +10,21 @@ #include #include -#include +#include #include #include #include #include +namespace pi { +class plugin; +} + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { class context; namespace detail { -class plugin; class context_impl; using ContextImplPtr = std::shared_ptr; class queue_impl; @@ -120,7 +123,7 @@ class event_impl { /// \return the Plugin associated with the context of this event. /// Should be called when this is not a Host Event. - const plugin &getPlugin() const; + const pi::plugin &getPlugin() const; /// Associate event with the context. /// diff --git a/sycl/source/detail/event_info.hpp b/sycl/source/detail/event_info.hpp index 62d4ed01c2947..03cf4e3d71792 100644 --- a/sycl/source/detail/event_info.hpp +++ b/sycl/source/detail/event_info.hpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index be4b9e200fda8..4f85312ff3bb3 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -10,9 +10,9 @@ #include #include #include -#include #include #include +#include #ifdef WIN32 #include @@ -91,16 +91,6 @@ std::mutex &GlobalHandler::getFilterMutex() { return *MFilterMutex; } -std::vector &GlobalHandler::getPlugins() { - if (MPlugins) - return *MPlugins; - - const std::lock_guard Lock{MFieldsLock}; - if (!MPlugins) - MPlugins = std::make_unique>(); - - return *MPlugins; -} device_filter_list & GlobalHandler::getDeviceFilterList(const std::string &InitValue) { if (MDeviceFilterList) diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index ae923593b5808..7dc272c3713f7 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -13,6 +13,10 @@ #include +namespace pi { +class plugin; +} + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { @@ -20,7 +24,6 @@ class platform_impl; class Scheduler; class ProgramManager; class Sync; -class plugin; class device_filter_list; using PlatformImplPtr = std::shared_ptr; @@ -54,7 +57,6 @@ class GlobalHandler { std::vector &getPlatformCache(); std::mutex &getPlatformMapMutex(); std::mutex &getFilterMutex(); - std::vector &getPlugins(); device_filter_list &getDeviceFilterList(const std::string &InitValue); private: @@ -72,7 +74,7 @@ class GlobalHandler { std::unique_ptr> MPlatformCache; std::unique_ptr MPlatformMapMutex; std::unique_ptr MFilterMutex; - std::unique_ptr> MPlugins; + std::unique_ptr> MPlugins; std::unique_ptr MDeviceFilterList; }; } // namespace detail diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 860cc5e0e7961..24a2e38b6eebf 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -9,7 +9,7 @@ #pragma once #include -#include +#include #include #include #include diff --git a/sycl/source/detail/kernel_info.hpp b/sycl/source/detail/kernel_info.hpp index 178bd273f33bb..bc3582d2af590 100644 --- a/sycl/source/detail/kernel_info.hpp +++ b/sycl/source/detail/kernel_info.hpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include diff --git a/sycl/source/detail/kernel_program_cache.cpp b/sycl/source/detail/kernel_program_cache.cpp index 18b13f3fd589c..a6d80fe8a9e05 100644 --- a/sycl/source/detail/kernel_program_cache.cpp +++ b/sycl/source/detail/kernel_program_cache.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index 478c1cc5cf3b8..59241ec4f5c3b 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/sycl/source/detail/pi_hooks.cpp b/sycl/source/detail/pi_hooks.cpp new file mode 100644 index 0000000000000..51a630fd86d8b --- /dev/null +++ b/sycl/source/detail/pi_hooks.cpp @@ -0,0 +1,44 @@ +//==---------------- pi_hooks.cpp - SYCL standard source file --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifdef XPTI_ENABLE_INSTRUMENTATION +#include "xpti_trace_framework.hpp" +#endif // XPTI_ENABLE_INSTRUMENTATION + +#include +#include + +namespace pi { + +namespace config { + +TraceLevel trace_level_mask() { + using namespace cl::sycl::detail; + return static_cast(SYCLConfig::get()); +} + +pi::backend *backend() { + using namespace cl::sycl::detail; + return SYCLConfig::get(); +} + +pi::device_filter_list *device_filter_list() { + using namespace cl::sycl::detail; + return SYCLConfig::get(); +} + +} // namespace config + +#ifdef XPTI_ENABLE_INSTRUMENTATION + +xpti::trace_event_data_t *GSYCLGraphEvent = nullptr; + +const char *SYCL_STREAM_NAME = "sycl"; +#endif // XPTI_ENABLE_INSTRUMENTATION + +} // namespace pi diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index f86809d1e024e..4565b8585a226 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index a16f5de53456a..3aeb84baf8c57 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -8,11 +8,11 @@ #pragma once #include -#include +#include #include #include #include -#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -38,10 +38,10 @@ class platform_impl { /// \param APlatform is a raw plug-in platform handle. /// \param APlugin is a plug-in handle. explicit platform_impl(RT::PiPlatform APlatform, const plugin &APlugin) - : MPlatform(APlatform), MPlugin(std::make_shared(APlugin)) {} + : MPlatform(APlatform), MPlugin(std::make_shared(APlugin)) {} explicit platform_impl(RT::PiPlatform APlatform, - std::shared_ptr APlugin) + std::shared_ptr APlugin) : MPlatform(APlatform), MPlugin(APlugin) {} ~platform_impl() = default; @@ -128,7 +128,7 @@ class platform_impl { /// Sets the platform implementation to use another plugin. /// /// \param PluginPtr is a pointer to a plugin instance - void setPlugin(std::shared_ptr PluginPtr) { + void setPlugin(std::shared_ptr PluginPtr) { assert(!MHostPlatform && "Plugin is not available for Host"); MPlugin = std::move(PluginPtr); } @@ -194,7 +194,7 @@ class platform_impl { private: bool MHostPlatform = false; RT::PiPlatform MPlatform = 0; - std::shared_ptr MPlugin; + std::shared_ptr MPlugin; std::vector> MDeviceCache; std::mutex MDeviceMapMutex; }; diff --git a/sycl/source/detail/platform_info.hpp b/sycl/source/detail/platform_info.hpp index 1ba1c970e8fe4..951d8ac8b95f8 100644 --- a/sycl/source/detail/platform_info.hpp +++ b/sycl/source/detail/platform_info.hpp @@ -9,9 +9,9 @@ #pragma once #include #include -#include +#include #include -#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/detail/plugin_sycl.hpp b/sycl/source/detail/plugin_sycl.hpp new file mode 100644 index 0000000000000..dbba7147efe43 --- /dev/null +++ b/sycl/source/detail/plugin_sycl.hpp @@ -0,0 +1,23 @@ +//==---------- plugin_sycl.hpp - SYCL wrapper for PI plugin ----*- C++ -*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// ===--------------------------------------------------------------------=== // + +#pragma once + +#include +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +using plugin = ::pi::plugin; + +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index aea275b25e494..f7864ed05bade 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -8,12 +8,12 @@ #include #include -#include #include #include #include #include #include +#include #include #include diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 2507d56cfeb9c..e6c1ef50f4e70 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index e4caf0787c69b..6478435c51adc 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 1441a9f3e864d..1b3f55bca0f2a 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 21513a035b732..f97e4a1ba6c9f 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -43,6 +43,11 @@ #ifdef XPTI_ENABLE_INSTRUMENTATION #include "xpti_trace_framework.hpp" + +namespace pi { +// Global graph for the application +extern xpti::trace_event_data_t *GSYCLGraphEvent; +} // namespace pi #endif __SYCL_INLINE_NAMESPACE(cl) { @@ -50,8 +55,7 @@ namespace sycl { namespace detail { #ifdef XPTI_ENABLE_INSTRUMENTATION -// Global graph for the application -extern xpti::trace_event_data_t *GSYCLGraphEvent; +using pi::GSYCLGraphEvent; #endif #ifdef __SYCL_ENABLE_GNU_DEMANGLING @@ -773,7 +777,6 @@ void AllocaCommand::printDot(std::ostream &Stream) const { Stream << " Link : " << this->MLinkedAllocaCmd << "\\n"; Stream << "\"];" << std::endl; - for (const auto &Dep : MDeps) { if (Dep.MDepCommand == nullptr) continue; @@ -911,7 +914,6 @@ cl_int ReleaseCommand::enqueueImp() { // 3. Device alloca in the pair should be in active state in order to be // correctly released. - // There is no actual memory allocation if a host alloca command is created // being linked to a device allocation. SkipRelease |= CurAllocaIsHost && !MAllocaCmd->MIsLeaderAlloca; @@ -1991,15 +1993,16 @@ cl_int ExecCGCommand::enqueueImp() { Plugin.call(RawEvents.size(), &RawEvents[0]); } std::vector ReqMemObjs; - // Extract the Mem Objects for all Requirements, to ensure they are available if - // a user ask for them inside the interop task scope - const auto& HandlerReq = ExecInterop->MRequirements; - std::for_each(std::begin(HandlerReq), std::end(HandlerReq), [&](Requirement* Req) { - AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); - auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); - interop_handler::ReqToMem ReqToMem = std::make_pair(Req, MemArg); - ReqMemObjs.emplace_back(ReqToMem); - }); + // Extract the Mem Objects for all Requirements, to ensure they are + // available if a user ask for them inside the interop task scope + const auto &HandlerReq = ExecInterop->MRequirements; + std::for_each( + std::begin(HandlerReq), std::end(HandlerReq), [&](Requirement *Req) { + AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); + auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); + interop_handler::ReqToMem ReqToMem = std::make_pair(Req, MemArg); + ReqMemObjs.emplace_back(ReqToMem); + }); std::sort(std::begin(ReqMemObjs), std::end(ReqMemObjs)); interop_handler InteropHandler(std::move(ReqMemObjs), MQueue); diff --git a/sycl/source/detail/spec_constant_impl.cpp b/sycl/source/detail/spec_constant_impl.cpp index f61db6fe4a0bc..000b58fa4165d 100644 --- a/sycl/source/detail/spec_constant_impl.cpp +++ b/sycl/source/detail/spec_constant_impl.cpp @@ -9,9 +9,9 @@ #include #include -#include #include #include +#include #include #include diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 82cddfddb8b00..3aef4122523d0 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include __SYCL_INLINE_NAMESPACE(cl) { diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index d6acee67293cf..5c78364adfbec 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include @@ -194,7 +194,7 @@ void *aligned_alloc_host(size_t Alignment, size_t Size, const context &Ctxt) { void *aligned_alloc_host(size_t Alignment, size_t Size, const queue &Q) { return aligned_alloc_host(Alignment, Size, Q.get_context()); -} +} void *aligned_alloc_shared(size_t Alignment, size_t Size, const device &Dev, const context &Ctxt) { diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 57ca9f6190615..b71a97e880730 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -13,6 +13,7 @@ #include #include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index fa36438ae09b5..a80153d5c7dc3 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include diff --git a/sycl/source/interop_handler.cpp b/sycl/source/interop_handler.cpp index f6fb55ad28120..b37ed26de282e 100644 --- a/sycl/source/interop_handler.cpp +++ b/sycl/source/interop_handler.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include #include #include diff --git a/sycl/source/platform.cpp b/sycl/source/platform.cpp index 3ab9f5b020d6d..28a568645746f 100644 --- a/sycl/source/platform.cpp +++ b/sycl/source/platform.cpp @@ -12,6 +12,7 @@ #include #include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/tools/CMakeLists.txt b/sycl/tools/CMakeLists.txt index 83c02bd94481f..1ed6a84359ede 100644 --- a/sycl/tools/CMakeLists.txt +++ b/sycl/tools/CMakeLists.txt @@ -2,22 +2,13 @@ add_subdirectory(sycl-ls) # TODO: move each tool in its own sub-directory add_executable(get_device_count_by_type get_device_count_by_type.cpp) -add_dependencies(get_device_count_by_type ocl-headers ocl-icd level-zero-loader) - -if(MSVC) - set(LEVEL_ZERO_LIBRARY - "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}ze_loader${CMAKE_STATIC_LIBRARY_SUFFIX}") -else() - set(LEVEL_ZERO_LIBRARY - "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}ze_loader${CMAKE_SHARED_LIBRARY_SUFFIX}") -endif() +add_dependencies(get_device_count_by_type ocl-headers ocl-icd) target_link_libraries(get_device_count_by_type PRIVATE OpenCL::Headers - LevelZeroLoader::Headers ${OpenCL_LIBRARIES} - ${LEVEL_ZERO_LIBRARY} + level_zero_lib $<$:cudadrv> ) target_compile_definitions(get_device_count_by_type diff --git a/sycl/tools/sycl-ls/CMakeLists.txt b/sycl/tools/sycl-ls/CMakeLists.txt index 5833d617d9c4e..16dd5115e2be8 100644 --- a/sycl/tools/sycl-ls/CMakeLists.txt +++ b/sycl/tools/sycl-ls/CMakeLists.txt @@ -5,6 +5,7 @@ target_link_libraries(sycl-ls PRIVATE sycl OpenCL::Headers + piapi::piapi ) install(TARGETS sycl-ls RUNTIME DESTINATION "bin" COMPONENT sycl-ls) diff --git a/sycl/unittests/helpers/PiMock.hpp b/sycl/unittests/helpers/PiMock.hpp index aad34cba434b5..495e94edb0b97 100644 --- a/sycl/unittests/helpers/PiMock.hpp +++ b/sycl/unittests/helpers/PiMock.hpp @@ -27,7 +27,7 @@ #include #include -#include +#include #include #include @@ -53,7 +53,7 @@ namespace RT = detail::pi; decltype(&::api) FuncPtr) { \ MPlugin->PiFunctionTable.api = FuncPtr; \ } -#include +#include #undef _PI_API /// The PiMock class wraps an instance of a SYCL platform class, diff --git a/sycl/unittests/kernel-and-program/Cache.cpp b/sycl/unittests/kernel-and-program/Cache.cpp index d7c6ed6fbf4b2..3246aa759c2d0 100644 --- a/sycl/unittests/kernel-and-program/Cache.cpp +++ b/sycl/unittests/kernel-and-program/Cache.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "CL/sycl/detail/pi.h" +#include "CL/sycl/detail/pi_sycl.hpp" #include "detail/context_impl.hpp" #include "detail/kernel_program_cache.hpp" #include "detail/program_impl.hpp" diff --git a/sycl/unittests/pi/BackendString.hpp b/sycl/unittests/pi/BackendString.hpp index 7f051f5ab6790..588d91c6c7a99 100644 --- a/sycl/unittests/pi/BackendString.hpp +++ b/sycl/unittests/pi/BackendString.hpp @@ -4,13 +4,13 @@ #pragma once -#include +#include namespace pi { -inline const char *GetBackendString(cl::sycl::backend backend) { +inline const char *GetBackendString(pi::backend backend) { switch (backend) { #define PI_BACKEND_STR(backend_name) \ - case cl::sycl::backend::backend_name: \ + case pi::backend::backend_name: \ return #backend_name PI_BACKEND_STR(cuda); PI_BACKEND_STR(host); diff --git a/sycl/unittests/pi/EnqueueMemTest.cpp b/sycl/unittests/pi/EnqueueMemTest.cpp index 8abf016e5b322..7e42da1674f3c 100644 --- a/sycl/unittests/pi/EnqueueMemTest.cpp +++ b/sycl/unittests/pi/EnqueueMemTest.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "TestGetPlugin.hpp" -#include -#include +#include +#include #include using namespace cl::sycl; diff --git a/sycl/unittests/pi/PlatformTest.cpp b/sycl/unittests/pi/PlatformTest.cpp index c3d04721cc992..3e6383f3e0cf9 100644 --- a/sycl/unittests/pi/PlatformTest.cpp +++ b/sycl/unittests/pi/PlatformTest.cpp @@ -8,8 +8,8 @@ #include "TestGetPlugin.hpp" #include -#include -#include +#include +#include #include #include diff --git a/sycl/unittests/pi/TestGetPlugin.hpp b/sycl/unittests/pi/TestGetPlugin.hpp index 6512b111f8123..950559b300920 100644 --- a/sycl/unittests/pi/TestGetPlugin.hpp +++ b/sycl/unittests/pi/TestGetPlugin.hpp @@ -6,11 +6,11 @@ #include "BackendString.hpp" #include -#include +#include #include namespace pi { -inline cl::sycl::detail::plugin initializeAndGet(cl::sycl::backend backend) { +inline cl::sycl::detail::plugin initializeAndGet(pi::backend backend) { auto plugins = cl::sycl::detail::pi::initialize(); auto it = std::find_if(plugins.begin(), plugins.end(), [=](cl::sycl::detail::plugin p) -> bool { diff --git a/sycl/unittests/pi/cuda/test_base_objects.cpp b/sycl/unittests/pi/cuda/test_base_objects.cpp index 1ae64f981931c..aaac1e4410dd1 100644 --- a/sycl/unittests/pi/cuda/test_base_objects.cpp +++ b/sycl/unittests/pi/cuda/test_base_objects.cpp @@ -12,10 +12,10 @@ #include "TestGetPlugin.hpp" #include -#include -#include -#include -#include +#include +#include +#include +#include #include const unsigned int LATEST_KNOWN_CUDA_DRIVER_API_VERSION = 3020u; diff --git a/sycl/unittests/pi/cuda/test_commands.cpp b/sycl/unittests/pi/cuda/test_commands.cpp index 3235391f93425..9092abb945b19 100644 --- a/sycl/unittests/pi/cuda/test_commands.cpp +++ b/sycl/unittests/pi/cuda/test_commands.cpp @@ -12,9 +12,9 @@ #include "TestGetPlugin.hpp" #include -#include -#include -#include +#include +#include +#include using namespace cl::sycl; diff --git a/sycl/unittests/pi/cuda/test_device.cpp b/sycl/unittests/pi/cuda/test_device.cpp index 7c8a5d756c3c7..a1f1ef13d1a88 100644 --- a/sycl/unittests/pi/cuda/test_device.cpp +++ b/sycl/unittests/pi/cuda/test_device.cpp @@ -12,9 +12,9 @@ #include "TestGetPlugin.hpp" #include -#include -#include -#include +#include +#include +#include using namespace cl::sycl; diff --git a/sycl/unittests/pi/cuda/test_interop_get_native.cpp b/sycl/unittests/pi/cuda/test_interop_get_native.cpp index 584f73ad45c00..4bc2a16e67e5a 100644 --- a/sycl/unittests/pi/cuda/test_interop_get_native.cpp +++ b/sycl/unittests/pi/cuda/test_interop_get_native.cpp @@ -13,6 +13,7 @@ #include #include #include +#include using namespace cl::sycl; diff --git a/sycl/unittests/pi/cuda/test_kernels.cpp b/sycl/unittests/pi/cuda/test_kernels.cpp index 0b57e7eb82790..3943fdf77d28e 100644 --- a/sycl/unittests/pi/cuda/test_kernels.cpp +++ b/sycl/unittests/pi/cuda/test_kernels.cpp @@ -12,9 +12,9 @@ #include "TestGetPlugin.hpp" #include -#include -#include -#include +#include +#include +#include // PI CUDA kernels carry an additional argument for the implicit global offset. #define NUM_IMPLICIT_ARGS 1 diff --git a/sycl/unittests/pi/cuda/test_mem_obj.cpp b/sycl/unittests/pi/cuda/test_mem_obj.cpp index 2441cdd14f3b0..ae45ef3e92a1c 100644 --- a/sycl/unittests/pi/cuda/test_mem_obj.cpp +++ b/sycl/unittests/pi/cuda/test_mem_obj.cpp @@ -12,10 +12,10 @@ #include "TestGetPlugin.hpp" #include -#include -#include -#include -#include +#include +#include +#include +#include using namespace cl::sycl; diff --git a/sycl/unittests/pi/cuda/test_primary_context.cpp b/sycl/unittests/pi/cuda/test_primary_context.cpp index da0036a8ebc5b..a8d383a65c6bc 100644 --- a/sycl/unittests/pi/cuda/test_primary_context.cpp +++ b/sycl/unittests/pi/cuda/test_primary_context.cpp @@ -13,7 +13,7 @@ #include "TestGetPlatforms.hpp" #include #include -#include +#include #include diff --git a/sycl/unittests/pi/cuda/test_queue.cpp b/sycl/unittests/pi/cuda/test_queue.cpp index 39ee2731df03a..0a7c49a0321b1 100644 --- a/sycl/unittests/pi/cuda/test_queue.cpp +++ b/sycl/unittests/pi/cuda/test_queue.cpp @@ -12,10 +12,10 @@ #include "TestGetPlugin.hpp" #include -#include -#include -#include -#include +#include +#include +#include +#include using namespace cl::sycl; diff --git a/sycl/unittests/pi/cuda/test_sampler_properties.cpp b/sycl/unittests/pi/cuda/test_sampler_properties.cpp index c4a19d8dc9e93..b3fa008d12976 100644 --- a/sycl/unittests/pi/cuda/test_sampler_properties.cpp +++ b/sycl/unittests/pi/cuda/test_sampler_properties.cpp @@ -8,9 +8,10 @@ #include "TestGetPlugin.hpp" #include -#include -#include + +#include #include +#include #include namespace {