From a98551752c79b5ae53f011f31579aa774b776ffd Mon Sep 17 00:00:00 2001 From: Vyacheslav Zakharin Date: Mon, 1 Mar 2021 19:28:33 -0800 Subject: [PATCH 01/11] ITT stubs and compiler wrappers for SPIR-V devices. Signed-off-by: Vyacheslav Zakharin --- libdevice/cmake/modules/SYCLLibdevice.cmake | 25 +++++++++ libdevice/device_itt.h | 60 +++++++++++++++++++++ libdevice/itt_cmplr_wrappers.cpp | 59 ++++++++++++++++++++ libdevice/itt_stubs.cpp | 27 ++++++++++ 4 files changed, 171 insertions(+) create mode 100644 libdevice/device_itt.h create mode 100644 libdevice/itt_cmplr_wrappers.cpp create mode 100644 libdevice/itt_stubs.cpp diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index eb94db50095d8..6a65efc2ec6e8 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -167,12 +167,36 @@ add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-cmath-fp64.${lib-su DEPENDS device_math.h device.h clang clang-offload-bundler VERBATIM) +add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-itt-stubs.${lib-suffix} + COMMAND ${clang} -fsycl -c + ${compile_opts} ${sycl_targets_opt} + ${CMAKE_CURRENT_SOURCE_DIR}/itt_stubs.cpp + -o ${obj_binary_dir}/libsycl-itt-stubs.${lib-suffix} + MAIN_DEPENDENCY itt_stubs.cpp + DEPENDS device_itt.h device.h clang clang-offload-bundler + VERBATIM) + +add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-itt-wrappers.${lib-suffix} + COMMAND ${clang} -fsycl -c + ${compile_opts} ${sycl_targets_opt} + ${CMAKE_CURRENT_SOURCE_DIR}/itt_cmplr_wrappers.cpp + -o ${obj_binary_dir}/libsycl-itt-wrappers.${lib-suffix} + MAIN_DEPENDENCY itt_cmplr_wrappers.cpp + DEPENDS device_itt.h device.h clang clang-offload-bundler + VERBATIM) + +set(devicelib-obj-itt-files + ${obj_binary_dir}/libsycl-itt-stubs.${lib-suffix} + ${obj_binary_dir}/libsycl-itt-wrappers.${lib-suffix} + ) + add_custom_target(libsycldevice-obj DEPENDS ${devicelib-obj-file} ${devicelib-obj-complex} ${devicelib-obj-complex-fp64} ${devicelib-obj-cmath} ${devicelib-obj-cmath-fp64} + ${devicelib-obj-itt-files} ) add_custom_target(libsycldevice-spv DEPENDS ${spv_binary_dir}/libsycl-fallback-cassert.spv @@ -213,6 +237,7 @@ install(FILES ${devicelib-obj-file} ${obj_binary_dir}/libsycl-fallback-cmath.${lib-suffix} ${devicelib-obj-cmath-fp64} ${obj_binary_dir}/libsycl-fallback-cmath-fp64.${lib-suffix} + ${devicelib-obj-itt-files} DESTINATION ${install_dest_lib} COMPONENT libsycldevice) diff --git a/libdevice/device_itt.h b/libdevice/device_itt.h new file mode 100644 index 0000000000000..5b79236735caa --- /dev/null +++ b/libdevice/device_itt.h @@ -0,0 +1,60 @@ +//==------- device_itt.h - ITT devicelib functions declarations ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//==------------------------------------------------------------------------==// + +#ifndef __LIBDEVICE_DEVICE_ITT_H__ +#define __LIBDEVICE_DEVICE_ITT_H__ + +#include "device.h" + +#ifdef __SPIR__ +#include +#include + +// Use SPIRV constants directly in place of OCL intrinsic functions. +#define __SPIRV_VAR_QUALIFIERS EXTERN_C const +typedef size_t size_t_vec __attribute__((ext_vector_type(3))); +__SPIRV_VAR_QUALIFIERS size_t __spirv_BuiltInGlobalLinearId; +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupId; +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupSize; + +#define ITT_STUB_ATTRIBUTES __attribute__((noinline,optnone)) + +// FIXME: must be enabled via -fdeclare-spirv-builtins +DEVICE_EXTERN_C char __spirv_SpecConstant(int, char); + +#define ITT_SPEC_CONSTANT 0xFF747469 + +static inline bool isITTEnabled() { + return __spirv_SpecConstant(ITT_SPEC_CONSTANT, 0) != 0; +} + +// Wrapper APIs that may be called by compiler-generated code. +DEVICE_EXTERN_C +void __itt_spirv_wi_start_wrapper(); +DEVICE_EXTERN_C +void __itt_spirv_wi_finish_wrapper(); +DEVICE_EXTERN_C +void __itt_spirv_wg_barrier_wrapper(); +DEVICE_EXTERN_C +void __itt_spirv_wi_resume_wrapper(); + +// Non-inlinable and non-optimizable APIs that are recognized +// by profiling tools. +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES +void __itt_spirv_wi_start_stub( + size_t *group_id, size_t wi_id, uint32_t wg_size); +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES +void __itt_spirv_wi_finish_stub( + size_t *group_id, size_t wi_id); +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES +void __itt_spirv_wg_barrier_stub(uintptr_t barrier_id); +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES +void __itt_spirv_wi_resume_stub(size_t* group_id, size_t wi_id); + +#endif // __SPIR__ +#endif // __LIBDEVICE_DEVICE_ITT_H__ diff --git a/libdevice/itt_cmplr_wrappers.cpp b/libdevice/itt_cmplr_wrappers.cpp new file mode 100644 index 0000000000000..8671ef02f7572 --- /dev/null +++ b/libdevice/itt_cmplr_wrappers.cpp @@ -0,0 +1,59 @@ +//==--- itt_cmplr_wrappers.cpp - compiler wtappers for ITT -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "device_itt.h" + +#ifdef __SPIR__ + +DEVICE_EXTERN_C +void __itt_spirv_wi_start_wrapper() { + if (!isITTEnabled()) + return; + + size_t GroupID[3] = {__spirv_BuiltInWorkgroupId.x, + __spirv_BuiltInWorkgroupId.y, + __spirv_BuiltInWorkgroupId.z}; + size_t WIID = __spirv_BuiltInGlobalLinearId; + uint32_t WGSize = static_cast(__spirv_BuiltInWorkgroupSize.x * + __spirv_BuiltInWorkgroupSize.y * __spirv_BuiltInWorkgroupSize.z); + __itt_spirv_wi_start_stub(GroupID, WIID, WGSize); +} + +DEVICE_EXTERN_C +void __itt_spirv_wi_finish_wrapper() { + if (!isITTEnabled()) + return; + + size_t GroupID[3] = {__spirv_BuiltInWorkgroupId.x, + __spirv_BuiltInWorkgroupId.y, + __spirv_BuiltInWorkgroupId.z}; + size_t WIID = __spirv_BuiltInGlobalLinearId; + __itt_spirv_wi_finish_stub(GroupID, WIID); +} + +DEVICE_EXTERN_C +void __itt_spirv_wg_barrier_wrapper() { + if (!isITTEnabled()) + return; + + __itt_spirv_wg_barrier_stub(0); +} + +DEVICE_EXTERN_C +void __itt_spirv_wi_resume_wrapper() { + if (!isITTEnabled()) + return; + + size_t GroupID[3] = {__spirv_BuiltInWorkgroupId.x, + __spirv_BuiltInWorkgroupId.y, + __spirv_BuiltInWorkgroupId.z}; + size_t WIID = __spirv_BuiltInGlobalLinearId; + __itt_spirv_wi_resume_stub(GroupID, WIID); +} + +#endif // __SPIR__ diff --git a/libdevice/itt_stubs.cpp b/libdevice/itt_stubs.cpp new file mode 100644 index 0000000000000..d4a73232cab20 --- /dev/null +++ b/libdevice/itt_stubs.cpp @@ -0,0 +1,27 @@ +//==--- itt_stubs.cpp - stub functions for ITT ----------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "device_itt.h" + +#ifdef __SPIR__ + +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES +void __itt_spirv_wi_start_stub( + size_t *group_id, size_t wi_id, uint32_t wg_size) {} + +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES +void __itt_spirv_wi_finish_stub( + size_t *group_id, size_t wi_id) {} + +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES +void __itt_spirv_wg_barrier_stub(uintptr_t barrier_id) {} + +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES +void __itt_spirv_wi_resume_stub(size_t* group_id, size_t wi_id) {} + +#endif // __SPIR__ From e2e60cb29f6530223d6a56f68118185ca64db41e Mon Sep 17 00:00:00 2001 From: Vyacheslav Zakharin Date: Mon, 1 Mar 2021 19:40:01 -0800 Subject: [PATCH 02/11] clang-format Signed-off-by: Vyacheslav Zakharin --- libdevice/device_itt.h | 20 +++++++++----------- libdevice/itt_cmplr_wrappers.cpp | 3 ++- libdevice/itt_stubs.cpp | 18 ++++++++---------- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/libdevice/device_itt.h b/libdevice/device_itt.h index 5b79236735caa..5805e6db5fa94 100644 --- a/libdevice/device_itt.h +++ b/libdevice/device_itt.h @@ -22,7 +22,7 @@ __SPIRV_VAR_QUALIFIERS size_t __spirv_BuiltInGlobalLinearId; __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupId; __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupSize; -#define ITT_STUB_ATTRIBUTES __attribute__((noinline,optnone)) +#define ITT_STUB_ATTRIBUTES __attribute__((noinline, optnone)) // FIXME: must be enabled via -fdeclare-spirv-builtins DEVICE_EXTERN_C char __spirv_SpecConstant(int, char); @@ -45,16 +45,14 @@ void __itt_spirv_wi_resume_wrapper(); // Non-inlinable and non-optimizable APIs that are recognized // by profiling tools. -DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES -void __itt_spirv_wi_start_stub( - size_t *group_id, size_t wi_id, uint32_t wg_size); -DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES -void __itt_spirv_wi_finish_stub( - size_t *group_id, size_t wi_id); -DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES -void __itt_spirv_wg_barrier_stub(uintptr_t barrier_id); -DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES -void __itt_spirv_wi_resume_stub(size_t* group_id, size_t wi_id); +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_spirv_wi_start_stub(size_t *group_id, size_t wi_id, uint32_t wg_size); +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_spirv_wi_finish_stub(size_t *group_id, size_t wi_id); +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_spirv_wg_barrier_stub(uintptr_t barrier_id); +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_spirv_wi_resume_stub(size_t *group_id, size_t wi_id); #endif // __SPIR__ #endif // __LIBDEVICE_DEVICE_ITT_H__ diff --git a/libdevice/itt_cmplr_wrappers.cpp b/libdevice/itt_cmplr_wrappers.cpp index 8671ef02f7572..b30df40e61716 100644 --- a/libdevice/itt_cmplr_wrappers.cpp +++ b/libdevice/itt_cmplr_wrappers.cpp @@ -20,7 +20,8 @@ void __itt_spirv_wi_start_wrapper() { __spirv_BuiltInWorkgroupId.z}; size_t WIID = __spirv_BuiltInGlobalLinearId; uint32_t WGSize = static_cast(__spirv_BuiltInWorkgroupSize.x * - __spirv_BuiltInWorkgroupSize.y * __spirv_BuiltInWorkgroupSize.z); + __spirv_BuiltInWorkgroupSize.y * + __spirv_BuiltInWorkgroupSize.z); __itt_spirv_wi_start_stub(GroupID, WIID, WGSize); } diff --git a/libdevice/itt_stubs.cpp b/libdevice/itt_stubs.cpp index d4a73232cab20..e643847b15626 100644 --- a/libdevice/itt_stubs.cpp +++ b/libdevice/itt_stubs.cpp @@ -10,18 +10,16 @@ #ifdef __SPIR__ -DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES -void __itt_spirv_wi_start_stub( - size_t *group_id, size_t wi_id, uint32_t wg_size) {} +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_spirv_wi_start_stub(size_t *group_id, size_t wi_id, uint32_t wg_size) {} -DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES -void __itt_spirv_wi_finish_stub( - size_t *group_id, size_t wi_id) {} +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_spirv_wi_finish_stub(size_t *group_id, size_t wi_id) {} -DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES -void __itt_spirv_wg_barrier_stub(uintptr_t barrier_id) {} +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_spirv_wg_barrier_stub(uintptr_t barrier_id) {} -DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES -void __itt_spirv_wi_resume_stub(size_t* group_id, size_t wi_id) {} +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_spirv_wi_resume_stub(size_t *group_id, size_t wi_id) {} #endif // __SPIR__ From 47a8102e92ca953bf6088cb1dd307ec32644a535 Mon Sep 17 00:00:00 2001 From: Vyacheslav Zakharin Date: Mon, 1 Mar 2021 19:42:53 -0800 Subject: [PATCH 03/11] Removed \t. Signed-off-by: Vyacheslav Zakharin --- libdevice/cmake/modules/SYCLLibdevice.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 6a65efc2ec6e8..c53c1c2d389a0 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -236,7 +236,7 @@ install(FILES ${devicelib-obj-file} ${devicelib-obj-cmath} ${obj_binary_dir}/libsycl-fallback-cmath.${lib-suffix} ${devicelib-obj-cmath-fp64} - ${obj_binary_dir}/libsycl-fallback-cmath-fp64.${lib-suffix} + ${obj_binary_dir}/libsycl-fallback-cmath-fp64.${lib-suffix} ${devicelib-obj-itt-files} DESTINATION ${install_dest_lib} COMPONENT libsycldevice) From 391d0e3f756608dbaaed4235e4597e8da9912eeb Mon Sep 17 00:00:00 2001 From: Vyacheslav Zakharin Date: Tue, 2 Mar 2021 12:02:40 -0800 Subject: [PATCH 04/11] Moved declarations to spirv_vars.h --- libdevice/spirv_vars.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/libdevice/spirv_vars.h b/libdevice/spirv_vars.h index 5a1a1d0d96705..1f75257d59026 100644 --- a/libdevice/spirv_vars.h +++ b/libdevice/spirv_vars.h @@ -16,27 +16,31 @@ #include #include +#define __SPIRV_VAR_QUALIFIERS EXTERN_C const typedef size_t size_t_vec __attribute__((ext_vector_type(3))); -extern "C" const size_t_vec __spirv_BuiltInGlobalInvocationId; -extern "C" const size_t_vec __spirv_BuiltInLocalInvocationId; +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInGlobalInvocationId; +__SPIRV_VAR_QUALIFIERS size_t __spirv_BuiltInGlobalLinearId; +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInLocalInvocationId; +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupId; +__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupSize; -DEVICE_EXTERNAL inline size_t __spirv_GlobalInvocationId_x() { +static inline size_t __spirv_GlobalInvocationId_x() { return __spirv_BuiltInGlobalInvocationId.x; } -DEVICE_EXTERNAL inline size_t __spirv_GlobalInvocationId_y() { +static inline size_t __spirv_GlobalInvocationId_y() { return __spirv_BuiltInGlobalInvocationId.y; } -DEVICE_EXTERNAL inline size_t __spirv_GlobalInvocationId_z() { +static inline size_t __spirv_GlobalInvocationId_z() { return __spirv_BuiltInGlobalInvocationId.z; } -DEVICE_EXTERNAL inline size_t __spirv_LocalInvocationId_x() { +static inline size_t __spirv_LocalInvocationId_x() { return __spirv_BuiltInLocalInvocationId.x; } -DEVICE_EXTERNAL inline size_t __spirv_LocalInvocationId_y() { +static inline size_t __spirv_LocalInvocationId_y() { return __spirv_BuiltInLocalInvocationId.y; } -DEVICE_EXTERNAL inline size_t __spirv_LocalInvocationId_z() { +static inline size_t __spirv_LocalInvocationId_z() { return __spirv_BuiltInLocalInvocationId.z; } From ea511ff3d0147cfbea5e857c9b4c12c6fd345aea Mon Sep 17 00:00:00 2001 From: Vyacheslav Zakharin Date: Wed, 3 Mar 2021 08:21:33 -0800 Subject: [PATCH 05/11] Renamed itt_cmplr_wrappers.cpp Signed-off-by: Vyacheslav Zakharin --- libdevice/cmake/modules/SYCLLibdevice.cmake | 8 ++++---- .../{itt_cmplr_wrappers.cpp => itt_compiler_wrappers.cpp} | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) rename libdevice/{itt_cmplr_wrappers.cpp => itt_compiler_wrappers.cpp} (96%) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index c53c1c2d389a0..a6aaf63870aee 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -179,15 +179,15 @@ add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-itt-stubs.${lib-suffix} add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-itt-wrappers.${lib-suffix} COMMAND ${clang} -fsycl -c ${compile_opts} ${sycl_targets_opt} - ${CMAKE_CURRENT_SOURCE_DIR}/itt_cmplr_wrappers.cpp - -o ${obj_binary_dir}/libsycl-itt-wrappers.${lib-suffix} - MAIN_DEPENDENCY itt_cmplr_wrappers.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/itt_compiler_wrappers.cpp + -o ${obj_binary_dir}/libsycl-itt-compiler-wrappers.${lib-suffix} + MAIN_DEPENDENCY itt_compiler_wrappers.cpp DEPENDS device_itt.h device.h clang clang-offload-bundler VERBATIM) set(devicelib-obj-itt-files ${obj_binary_dir}/libsycl-itt-stubs.${lib-suffix} - ${obj_binary_dir}/libsycl-itt-wrappers.${lib-suffix} + ${obj_binary_dir}/libsycl-itt-compiler-wrappers.${lib-suffix} ) add_custom_target(libsycldevice-obj DEPENDS diff --git a/libdevice/itt_cmplr_wrappers.cpp b/libdevice/itt_compiler_wrappers.cpp similarity index 96% rename from libdevice/itt_cmplr_wrappers.cpp rename to libdevice/itt_compiler_wrappers.cpp index b30df40e61716..5228e2d298709 100644 --- a/libdevice/itt_cmplr_wrappers.cpp +++ b/libdevice/itt_compiler_wrappers.cpp @@ -1,4 +1,4 @@ -//==--- itt_cmplr_wrappers.cpp - compiler wtappers for ITT -----------------==// +//==--- itt_compiler_wrappers.cpp - compiler wrappers for ITT --------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From ab8c04c05c741898bfeba2ec8f7101cc686bfd45 Mon Sep 17 00:00:00 2001 From: Vyacheslav Zakharin Date: Wed, 3 Mar 2021 10:32:06 -0800 Subject: [PATCH 06/11] Cleaned up names and added user wrappers. Signed-off-by: Vyacheslav Zakharin --- libdevice/cmake/modules/SYCLLibdevice.cmake | 16 +++- libdevice/device_itt.h | 90 +++++++++++++++++---- libdevice/itt_compiler_wrappers.cpp | 16 ++-- libdevice/itt_stubs.cpp | 21 ++++- libdevice/itt_user_wrappers.cpp | 69 ++++++++++++++++ 5 files changed, 180 insertions(+), 32 deletions(-) create mode 100644 libdevice/itt_user_wrappers.cpp diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index a6aaf63870aee..dbb75e31104e0 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -173,21 +173,31 @@ add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-itt-stubs.${lib-suffix} ${CMAKE_CURRENT_SOURCE_DIR}/itt_stubs.cpp -o ${obj_binary_dir}/libsycl-itt-stubs.${lib-suffix} MAIN_DEPENDENCY itt_stubs.cpp - DEPENDS device_itt.h device.h clang clang-offload-bundler + DEPENDS device_itt.h spirv_vars.h device.h clang clang-offload-bundler VERBATIM) -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-itt-wrappers.${lib-suffix} +add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-itt-compiler-wrappers.${lib-suffix} COMMAND ${clang} -fsycl -c ${compile_opts} ${sycl_targets_opt} ${CMAKE_CURRENT_SOURCE_DIR}/itt_compiler_wrappers.cpp -o ${obj_binary_dir}/libsycl-itt-compiler-wrappers.${lib-suffix} MAIN_DEPENDENCY itt_compiler_wrappers.cpp - DEPENDS device_itt.h device.h clang clang-offload-bundler + DEPENDS device_itt.h spirv_vars.h device.h clang clang-offload-bundler + VERBATIM) + +add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-itt-user-wrappers.${lib-suffix} + COMMAND ${clang} -fsycl -c + ${compile_opts} ${sycl_targets_opt} + ${CMAKE_CURRENT_SOURCE_DIR}/itt_user_wrappers.cpp + -o ${obj_binary_dir}/libsycl-itt-user-wrappers.${lib-suffix} + MAIN_DEPENDENCY itt_user_wrappers.cpp + DEPENDS device_itt.h spirv_vars.h device.h clang clang-offload-bundler VERBATIM) set(devicelib-obj-itt-files ${obj_binary_dir}/libsycl-itt-stubs.${lib-suffix} ${obj_binary_dir}/libsycl-itt-compiler-wrappers.${lib-suffix} + ${obj_binary_dir}/libsycl-itt-user-wrappers.${lib-suffix} ) add_custom_target(libsycldevice-obj DEPENDS diff --git a/libdevice/device_itt.h b/libdevice/device_itt.h index 5805e6db5fa94..86f65afa0eaaf 100644 --- a/libdevice/device_itt.h +++ b/libdevice/device_itt.h @@ -12,18 +12,26 @@ #include "device.h" #ifdef __SPIR__ -#include -#include - -// Use SPIRV constants directly in place of OCL intrinsic functions. -#define __SPIRV_VAR_QUALIFIERS EXTERN_C const -typedef size_t size_t_vec __attribute__((ext_vector_type(3))); -__SPIRV_VAR_QUALIFIERS size_t __spirv_BuiltInGlobalLinearId; -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupId; -__SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupSize; +#include "spirv_vars.h" #define ITT_STUB_ATTRIBUTES __attribute__((noinline, optnone)) +/// Atomic operation type +enum __itt_atomic_mem_op_t +{ + __itt_mem_load = 0, + __itt_mem_store = 1, + __itt_mem_update = 2 +}; + +/// Memory operation ordering semantic type +enum __itt_atomic_mem_order_t +{ + __itt_mem_order_relaxed = 0, + __itt_mem_order_acquire = 1, + __itt_mem_order_release = 2 +}; + // FIXME: must be enabled via -fdeclare-spirv-builtins DEVICE_EXTERN_C char __spirv_SpecConstant(int, char); @@ -34,25 +42,73 @@ static inline bool isITTEnabled() { } // Wrapper APIs that may be called by compiler-generated code. +// These are just parameterless helper APIs that call the corresponding +// stub APIs after preparing the arguments for them. +// +// Note that we do not provide compiler wrappers for all stub APIs. +// For example, there is no compiler wrapper for +// __itt_offload_sync_acquired_stub, since the API's parameter cannot +// be computed in the wrapper itself and has to be passed from outside. +// If a compiler needs to invoke such an API, it has to use the user +// visible API directly (i.e. __itt_offload_sync_acquired). DEVICE_EXTERN_C -void __itt_spirv_wi_start_wrapper(); +void __itt_offload_wi_start_wrapper(); DEVICE_EXTERN_C -void __itt_spirv_wi_finish_wrapper(); +void __itt_offload_wi_finish_wrapper(); DEVICE_EXTERN_C -void __itt_spirv_wg_barrier_wrapper(); +void __itt_offload_wg_barrier_wrapper(); DEVICE_EXTERN_C -void __itt_spirv_wi_resume_wrapper(); +void __itt_offload_wi_resume_wrapper(); // Non-inlinable and non-optimizable APIs that are recognized // by profiling tools. DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void -__itt_spirv_wi_start_stub(size_t *group_id, size_t wi_id, uint32_t wg_size); +__itt_offload_wi_start_stub(size_t *group_id, size_t wi_id, uint32_t wg_size); +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_offload_wi_finish_stub(size_t *group_id, size_t wi_id); DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void -__itt_spirv_wi_finish_stub(size_t *group_id, size_t wi_id); +__itt_offload_wg_barrier_stub(uintptr_t barrier_id); DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void -__itt_spirv_wg_barrier_stub(uintptr_t barrier_id); +__itt_offload_wi_resume_stub(size_t *group_id, size_t wi_id); DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void -__itt_spirv_wi_resume_stub(size_t *group_id, size_t wi_id); +__itt_offload_sync_acquired_stub(uintptr_t sync_id); +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_offload_sync_releasing_stub(uintptr_t sync_id); +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_offload_wg_local_range_stub(void* ptr, size_t size); +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_offload_atomic_op_start_stub(void* object, + __itt_atomic_mem_op_t op_type, + __itt_atomic_mem_order_t mem_order); +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_offload_atomic_op_finish_stub(void* object, + __itt_atomic_mem_op_t op_type, + __itt_atomic_mem_order_t mem_order); + +// User visible APIs. These may called both from user code and from +// compiler generated code. +DEVICE_EXTERN_C void +__itt_offload_wi_start(size_t *group_id, size_t wi_id, uint32_t wg_size); +DEVICE_EXTERN_C void +__itt_offload_wi_finish(size_t *group_id, size_t wi_id); +DEVICE_EXTERN_C void +__itt_offload_wg_barrier(uintptr_t barrier_id); +DEVICE_EXTERN_C void +__itt_offload_wi_resume(size_t *group_id, size_t wi_id); +DEVICE_EXTERN_C void +__itt_offload_sync_acquired(uintptr_t sync_id); +DEVICE_EXTERN_C void +__itt_offload_sync_releasing(uintptr_t sync_id); +DEVICE_EXTERN_C void +__itt_offload_wg_local_range(void* ptr, size_t size); +DEVICE_EXTERN_C void +__itt_offload_atomic_op_start(void* object, + __itt_atomic_mem_op_t op_type, + __itt_atomic_mem_order_t mem_order); +DEVICE_EXTERN_C void +__itt_offload_atomic_op_finish(void* object, + __itt_atomic_mem_op_t op_type, + __itt_atomic_mem_order_t mem_order); #endif // __SPIR__ #endif // __LIBDEVICE_DEVICE_ITT_H__ diff --git a/libdevice/itt_compiler_wrappers.cpp b/libdevice/itt_compiler_wrappers.cpp index 5228e2d298709..f942cbc69ff97 100644 --- a/libdevice/itt_compiler_wrappers.cpp +++ b/libdevice/itt_compiler_wrappers.cpp @@ -11,7 +11,7 @@ #ifdef __SPIR__ DEVICE_EXTERN_C -void __itt_spirv_wi_start_wrapper() { +void __itt_offload_wi_start_wrapper() { if (!isITTEnabled()) return; @@ -22,11 +22,11 @@ void __itt_spirv_wi_start_wrapper() { uint32_t WGSize = static_cast(__spirv_BuiltInWorkgroupSize.x * __spirv_BuiltInWorkgroupSize.y * __spirv_BuiltInWorkgroupSize.z); - __itt_spirv_wi_start_stub(GroupID, WIID, WGSize); + __itt_offload_wi_start_stub(GroupID, WIID, WGSize); } DEVICE_EXTERN_C -void __itt_spirv_wi_finish_wrapper() { +void __itt_offload_wi_finish_wrapper() { if (!isITTEnabled()) return; @@ -34,19 +34,19 @@ void __itt_spirv_wi_finish_wrapper() { __spirv_BuiltInWorkgroupId.y, __spirv_BuiltInWorkgroupId.z}; size_t WIID = __spirv_BuiltInGlobalLinearId; - __itt_spirv_wi_finish_stub(GroupID, WIID); + __itt_offload_wi_finish_stub(GroupID, WIID); } DEVICE_EXTERN_C -void __itt_spirv_wg_barrier_wrapper() { +void __itt_offload_wg_barrier_wrapper() { if (!isITTEnabled()) return; - __itt_spirv_wg_barrier_stub(0); + __itt_offload_wg_barrier_stub(0); } DEVICE_EXTERN_C -void __itt_spirv_wi_resume_wrapper() { +void __itt_offload_wi_resume_wrapper() { if (!isITTEnabled()) return; @@ -54,7 +54,7 @@ void __itt_spirv_wi_resume_wrapper() { __spirv_BuiltInWorkgroupId.y, __spirv_BuiltInWorkgroupId.z}; size_t WIID = __spirv_BuiltInGlobalLinearId; - __itt_spirv_wi_resume_stub(GroupID, WIID); + __itt_offload_wi_resume_stub(GroupID, WIID); } #endif // __SPIR__ diff --git a/libdevice/itt_stubs.cpp b/libdevice/itt_stubs.cpp index e643847b15626..5fd429a6c369c 100644 --- a/libdevice/itt_stubs.cpp +++ b/libdevice/itt_stubs.cpp @@ -11,15 +11,28 @@ #ifdef __SPIR__ DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void -__itt_spirv_wi_start_stub(size_t *group_id, size_t wi_id, uint32_t wg_size) {} +__itt_offload_wi_start_stub(size_t *group_id, size_t wi_id, uint32_t wg_size) {} DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void -__itt_spirv_wi_finish_stub(size_t *group_id, size_t wi_id) {} +__itt_offload_wi_finish_stub(size_t *group_id, size_t wi_id) {} DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void -__itt_spirv_wg_barrier_stub(uintptr_t barrier_id) {} +__itt_offload_wg_barrier_stub(uintptr_t barrier_id) {} DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void -__itt_spirv_wi_resume_stub(size_t *group_id, size_t wi_id) {} +__itt_offload_wi_resume_stub(size_t *group_id, size_t wi_id) {} + +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_offload_sync_acquired_stub(uintptr_t sync_id) {} +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_offload_sync_releasing_stub(uintptr_t sync_id) {} +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_offload_wg_local_range_stub(void* ptr, size_t size) {} +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_offload_atomic_op_start_stub(void* object, __itt_atomic_mem_op_t op_type, + __itt_atomic_mem_order_t mem_order) {} +DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void +__itt_offload_atomic_op_finish_stub(void* object, __itt_atomic_mem_op_t op_type, + __itt_atomic_mem_order_t mem_order) {} #endif // __SPIR__ diff --git a/libdevice/itt_user_wrappers.cpp b/libdevice/itt_user_wrappers.cpp new file mode 100644 index 0000000000000..45fe73e160b46 --- /dev/null +++ b/libdevice/itt_user_wrappers.cpp @@ -0,0 +1,69 @@ +//==--- itt_user_wrappers.cpp - user visible functions for ITT ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "device_itt.h" + +#ifdef __SPIR__ + +DEVICE_EXTERN_C void +__itt_offload_wi_start(size_t *group_id, size_t wi_id, uint32_t wg_size) { + if (isITTEnabled()) + __itt_offload_wi_start_stub(group_id, wi_id, wg_size); +} + +DEVICE_EXTERN_C void +__itt_offload_wi_finish(size_t *group_id, size_t wi_id) { + if (isITTEnabled()) + __itt_offload_wi_finish_stub(group_id, wi_id); +} + +DEVICE_EXTERN_C void +__itt_offload_wg_barrier(uintptr_t barrier_id) { + if (isITTEnabled()) + __itt_offload_wg_barrier_stub(barrier_id); +} + +DEVICE_EXTERN_C void +__itt_offload_wi_resume(size_t *group_id, size_t wi_id) { + if (isITTEnabled()) + __itt_offload_wi_resume_stub(group_id, wi_id); +} + +DEVICE_EXTERN_C void +__itt_offload_sync_acquired(uintptr_t sync_id) { + if (isITTEnabled()) + __itt_offload_sync_acquired_stub(sync_id); +} + +DEVICE_EXTERN_C void +__itt_offload_sync_releasing(uintptr_t sync_id) { + if (isITTEnabled()) + __itt_offload_sync_releasing_stub(sync_id); +} + +DEVICE_EXTERN_C void +__itt_offload_wg_local_range(void* ptr, size_t size) { + if (isITTEnabled()) + __itt_offload_wg_local_range_stub(ptr, size); +} + +DEVICE_EXTERN_C void +__itt_offload_atomic_op_start(void* object, __itt_atomic_mem_op_t op_type, + __itt_atomic_mem_order_t mem_order) { + if (isITTEnabled()) + __itt_offload_atomic_op_start_stub(object, op_type, mem_order); +} + +DEVICE_EXTERN_C void +__itt_offload_atomic_op_finish(void* object, __itt_atomic_mem_op_t op_type, + __itt_atomic_mem_order_t mem_order) { + if (isITTEnabled()) + __itt_offload_atomic_op_finish_stub(object, op_type, mem_order); +} + +#endif // __SPIR__ From 2d9c6c65e8f2e4ebb8374efa2a8045078f43ceac Mon Sep 17 00:00:00 2001 From: Vyacheslav Zakharin Date: Wed, 3 Mar 2021 10:45:51 -0800 Subject: [PATCH 07/11] clang-format Signed-off-by: Vyacheslav Zakharin --- libdevice/device_itt.h | 54 +++++++++++++-------------------- libdevice/itt_stubs.cpp | 6 ++-- libdevice/itt_user_wrappers.cpp | 26 ++++++---------- 3 files changed, 34 insertions(+), 52 deletions(-) diff --git a/libdevice/device_itt.h b/libdevice/device_itt.h index 86f65afa0eaaf..ed1555424c95f 100644 --- a/libdevice/device_itt.h +++ b/libdevice/device_itt.h @@ -17,19 +17,17 @@ #define ITT_STUB_ATTRIBUTES __attribute__((noinline, optnone)) /// Atomic operation type -enum __itt_atomic_mem_op_t -{ - __itt_mem_load = 0, - __itt_mem_store = 1, - __itt_mem_update = 2 +enum __itt_atomic_mem_op_t { + __itt_mem_load = 0, + __itt_mem_store = 1, + __itt_mem_update = 2 }; /// Memory operation ordering semantic type -enum __itt_atomic_mem_order_t -{ - __itt_mem_order_relaxed = 0, - __itt_mem_order_acquire = 1, - __itt_mem_order_release = 2 +enum __itt_atomic_mem_order_t { + __itt_mem_order_relaxed = 0, + __itt_mem_order_acquire = 1, + __itt_mem_order_release = 2 }; // FIXME: must be enabled via -fdeclare-spirv-builtins @@ -75,39 +73,29 @@ __itt_offload_sync_acquired_stub(uintptr_t sync_id); DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void __itt_offload_sync_releasing_stub(uintptr_t sync_id); DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void -__itt_offload_wg_local_range_stub(void* ptr, size_t size); +__itt_offload_wg_local_range_stub(void *ptr, size_t size); DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void -__itt_offload_atomic_op_start_stub(void* object, - __itt_atomic_mem_op_t op_type, +__itt_offload_atomic_op_start_stub(void *object, __itt_atomic_mem_op_t op_type, __itt_atomic_mem_order_t mem_order); DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void -__itt_offload_atomic_op_finish_stub(void* object, - __itt_atomic_mem_op_t op_type, +__itt_offload_atomic_op_finish_stub(void *object, __itt_atomic_mem_op_t op_type, __itt_atomic_mem_order_t mem_order); // User visible APIs. These may called both from user code and from // compiler generated code. +DEVICE_EXTERN_C void __itt_offload_wi_start(size_t *group_id, size_t wi_id, + uint32_t wg_size); +DEVICE_EXTERN_C void __itt_offload_wi_finish(size_t *group_id, size_t wi_id); +DEVICE_EXTERN_C void __itt_offload_wg_barrier(uintptr_t barrier_id); +DEVICE_EXTERN_C void __itt_offload_wi_resume(size_t *group_id, size_t wi_id); +DEVICE_EXTERN_C void __itt_offload_sync_acquired(uintptr_t sync_id); +DEVICE_EXTERN_C void __itt_offload_sync_releasing(uintptr_t sync_id); +DEVICE_EXTERN_C void __itt_offload_wg_local_range(void *ptr, size_t size); DEVICE_EXTERN_C void -__itt_offload_wi_start(size_t *group_id, size_t wi_id, uint32_t wg_size); -DEVICE_EXTERN_C void -__itt_offload_wi_finish(size_t *group_id, size_t wi_id); -DEVICE_EXTERN_C void -__itt_offload_wg_barrier(uintptr_t barrier_id); -DEVICE_EXTERN_C void -__itt_offload_wi_resume(size_t *group_id, size_t wi_id); -DEVICE_EXTERN_C void -__itt_offload_sync_acquired(uintptr_t sync_id); -DEVICE_EXTERN_C void -__itt_offload_sync_releasing(uintptr_t sync_id); -DEVICE_EXTERN_C void -__itt_offload_wg_local_range(void* ptr, size_t size); -DEVICE_EXTERN_C void -__itt_offload_atomic_op_start(void* object, - __itt_atomic_mem_op_t op_type, +__itt_offload_atomic_op_start(void *object, __itt_atomic_mem_op_t op_type, __itt_atomic_mem_order_t mem_order); DEVICE_EXTERN_C void -__itt_offload_atomic_op_finish(void* object, - __itt_atomic_mem_op_t op_type, +__itt_offload_atomic_op_finish(void *object, __itt_atomic_mem_op_t op_type, __itt_atomic_mem_order_t mem_order); #endif // __SPIR__ diff --git a/libdevice/itt_stubs.cpp b/libdevice/itt_stubs.cpp index 5fd429a6c369c..8eae2bf2d44dc 100644 --- a/libdevice/itt_stubs.cpp +++ b/libdevice/itt_stubs.cpp @@ -27,12 +27,12 @@ __itt_offload_sync_acquired_stub(uintptr_t sync_id) {} DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void __itt_offload_sync_releasing_stub(uintptr_t sync_id) {} DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void -__itt_offload_wg_local_range_stub(void* ptr, size_t size) {} +__itt_offload_wg_local_range_stub(void *ptr, size_t size) {} DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void -__itt_offload_atomic_op_start_stub(void* object, __itt_atomic_mem_op_t op_type, +__itt_offload_atomic_op_start_stub(void *object, __itt_atomic_mem_op_t op_type, __itt_atomic_mem_order_t mem_order) {} DEVICE_EXTERN_C ITT_STUB_ATTRIBUTES void -__itt_offload_atomic_op_finish_stub(void* object, __itt_atomic_mem_op_t op_type, +__itt_offload_atomic_op_finish_stub(void *object, __itt_atomic_mem_op_t op_type, __itt_atomic_mem_order_t mem_order) {} #endif // __SPIR__ diff --git a/libdevice/itt_user_wrappers.cpp b/libdevice/itt_user_wrappers.cpp index 45fe73e160b46..cedd2865eebb9 100644 --- a/libdevice/itt_user_wrappers.cpp +++ b/libdevice/itt_user_wrappers.cpp @@ -10,57 +10,51 @@ #ifdef __SPIR__ -DEVICE_EXTERN_C void -__itt_offload_wi_start(size_t *group_id, size_t wi_id, uint32_t wg_size) { +DEVICE_EXTERN_C void __itt_offload_wi_start(size_t *group_id, size_t wi_id, + uint32_t wg_size) { if (isITTEnabled()) __itt_offload_wi_start_stub(group_id, wi_id, wg_size); } -DEVICE_EXTERN_C void -__itt_offload_wi_finish(size_t *group_id, size_t wi_id) { +DEVICE_EXTERN_C void __itt_offload_wi_finish(size_t *group_id, size_t wi_id) { if (isITTEnabled()) __itt_offload_wi_finish_stub(group_id, wi_id); } -DEVICE_EXTERN_C void -__itt_offload_wg_barrier(uintptr_t barrier_id) { +DEVICE_EXTERN_C void __itt_offload_wg_barrier(uintptr_t barrier_id) { if (isITTEnabled()) __itt_offload_wg_barrier_stub(barrier_id); } -DEVICE_EXTERN_C void -__itt_offload_wi_resume(size_t *group_id, size_t wi_id) { +DEVICE_EXTERN_C void __itt_offload_wi_resume(size_t *group_id, size_t wi_id) { if (isITTEnabled()) __itt_offload_wi_resume_stub(group_id, wi_id); } -DEVICE_EXTERN_C void -__itt_offload_sync_acquired(uintptr_t sync_id) { +DEVICE_EXTERN_C void __itt_offload_sync_acquired(uintptr_t sync_id) { if (isITTEnabled()) __itt_offload_sync_acquired_stub(sync_id); } -DEVICE_EXTERN_C void -__itt_offload_sync_releasing(uintptr_t sync_id) { +DEVICE_EXTERN_C void __itt_offload_sync_releasing(uintptr_t sync_id) { if (isITTEnabled()) __itt_offload_sync_releasing_stub(sync_id); } -DEVICE_EXTERN_C void -__itt_offload_wg_local_range(void* ptr, size_t size) { +DEVICE_EXTERN_C void __itt_offload_wg_local_range(void *ptr, size_t size) { if (isITTEnabled()) __itt_offload_wg_local_range_stub(ptr, size); } DEVICE_EXTERN_C void -__itt_offload_atomic_op_start(void* object, __itt_atomic_mem_op_t op_type, +__itt_offload_atomic_op_start(void *object, __itt_atomic_mem_op_t op_type, __itt_atomic_mem_order_t mem_order) { if (isITTEnabled()) __itt_offload_atomic_op_start_stub(object, op_type, mem_order); } DEVICE_EXTERN_C void -__itt_offload_atomic_op_finish(void* object, __itt_atomic_mem_op_t op_type, +__itt_offload_atomic_op_finish(void *object, __itt_atomic_mem_op_t op_type, __itt_atomic_mem_order_t mem_order) { if (isITTEnabled()) __itt_offload_atomic_op_finish_stub(object, op_type, mem_order); From 089a80671201f191602ea9614481971456539d82 Mon Sep 17 00:00:00 2001 From: Vyacheslav Zakharin Date: Fri, 5 Mar 2021 10:44:35 -0800 Subject: [PATCH 08/11] Revert changes that uncovered __spirv_GlobalInvocationId_x issue. Signed-off-by: Vyacheslav Zakharin --- libdevice/spirv_vars.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/libdevice/spirv_vars.h b/libdevice/spirv_vars.h index 1f75257d59026..eab02e7a860be 100644 --- a/libdevice/spirv_vars.h +++ b/libdevice/spirv_vars.h @@ -24,23 +24,30 @@ __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInLocalInvocationId; __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupId; __SPIRV_VAR_QUALIFIERS size_t_vec __spirv_BuiltInWorkgroupSize; -static inline size_t __spirv_GlobalInvocationId_x() { +// FIXME: change DEVICE_EXTERNAL to static and rename the functions, +// when #3311 is fixed. +// These are just internal functions used within libdevice. +// We must not intrude the __spirv "namespace", so we'd better +// use names like getGlobalInvocationIdX. +// Libdevice must not export these APIs either, but it currently +// exports them due to DEVICE_EXTERNAL. +DEVICE_EXTERNAL inline size_t __spirv_GlobalInvocationId_x() { return __spirv_BuiltInGlobalInvocationId.x; } -static inline size_t __spirv_GlobalInvocationId_y() { +DEVICE_EXTERNAL inline size_t __spirv_GlobalInvocationId_y() { return __spirv_BuiltInGlobalInvocationId.y; } -static inline size_t __spirv_GlobalInvocationId_z() { +DEVICE_EXTERNAL inline size_t __spirv_GlobalInvocationId_z() { return __spirv_BuiltInGlobalInvocationId.z; } -static inline size_t __spirv_LocalInvocationId_x() { +DEVICE_EXTERNAL inline size_t __spirv_LocalInvocationId_x() { return __spirv_BuiltInLocalInvocationId.x; } -static inline size_t __spirv_LocalInvocationId_y() { +DEVICE_EXTERNAL inline size_t __spirv_LocalInvocationId_y() { return __spirv_BuiltInLocalInvocationId.y; } -static inline size_t __spirv_LocalInvocationId_z() { +DEVICE_EXTERNAL inline size_t __spirv_LocalInvocationId_z() { return __spirv_BuiltInLocalInvocationId.z; } From b5b3ff1240ed2a3d365ca6b6538f169ea42d32ee Mon Sep 17 00:00:00 2001 From: Vyacheslav Zakharin Date: Tue, 9 Mar 2021 11:46:57 -0800 Subject: [PATCH 09/11] Added documentation. Signed-off-by: Vyacheslav Zakharin --- .../ITTAnnotations/ITTAnnotations.rst | 79 +++++++++++++++++++ sycl/doc/extensions/README.md | 1 + 2 files changed, 80 insertions(+) create mode 100644 sycl/doc/extensions/ITTAnnotations/ITTAnnotations.rst diff --git a/sycl/doc/extensions/ITTAnnotations/ITTAnnotations.rst b/sycl/doc/extensions/ITTAnnotations/ITTAnnotations.rst new file mode 100644 index 0000000000000..67fcc65dd68ad --- /dev/null +++ b/sycl/doc/extensions/ITTAnnotations/ITTAnnotations.rst @@ -0,0 +1,79 @@ +ITT annotations support +======================= + +This extension enables a set of functions implementing +the Instrumentation and Tracing Technology (ITT) functionality +in SYCL device code. + +There are three sets of functions defined by this extension, +and they serve different purposes. + +User APIs +--------- + +The user code calling these functions must include the corresponding header +file(s) provided by `ittnotify` project (TBD: reference ITT repo here). + +These functions are named using `__itt_notify_` prefix. + +Stub APIs +--------- + +These functions are not defined in any header file, and their declarations +follow exactly the declarations of the corresponding user APIs, except that +they have an extra `_stub` suffix in their names. + +These functions implement the ITT functionality in a way that allows +the tools, such as Intel(R) Inspector, to recognize the ITT annotations +and run their analysis methods based on that. + +For SYCL device code these functions are implemented as `noinline` and `optnone` +functions so that the corresponding calls may be distinguished in the execution +trace. This is just one way for implementing them, and the actual implementation +may change in future. + +Compiler wrapper APIs +--------------------- + +These functions are not defined in any header file, and they are supposed +to be called from the compiler generated code. These thin wrappers +just provide a convenient way for compilers to produce ITT annotations +without generating too much code in the compilers' IR. + +These functions have `_wrapper` suffix in their names. + +Example +~~~~~~~ + +.. code: c++ + DEVICE_EXTERN_C void __itt_offload_wi_start_stub( + size_t[3], size_t, uint32_t); + + DEVICE_EXTERN_C void __itt_offload_wi_start_wrapper() { + if (__spirv_SpecConstant(0xFF747469, 0)) { + size_t GroupID[3] = ...; + size_t WIId = ...; + uint32_t WGSize = ...; + __itt_offload_wi_start_stub(GroupID, WIId, WGSize); + } + } + +A compiler may generate a simple call to `__itt_offload_wi_start_wrapper` +to annotate a kernel entry point. Compare this to the code inside the wrapper +function, which a compiler would have to generate if there were no such +a wrapper. + +Conditional compilation +----------------------- + +To minimize the effect of ITT annotations on the performance of the device code, +the implementation is guarded with a specialization constant check. This allows +users and tools to have one version of the annotated code that may be built +with and without ITT annotations "enabled". When the ITT annotations are not +enabled, we expect that the overall effect of the annotations will be minimized +by the dead code elimination optimization(s) made by the device compilers. + +For this purpose we reserve a 1-byte specialization constant numbered +`4285822057` (`0xFF747469`). The users/tools/runtimes should set this +specialization constant to non-zero value to enable the ITT annotations +in SYCL device code. diff --git a/sycl/doc/extensions/README.md b/sycl/doc/extensions/README.md index 1db502a579f93..e4b4a7bdb52be 100755 --- a/sycl/doc/extensions/README.md +++ b/sycl/doc/extensions/README.md @@ -36,6 +36,7 @@ DPC++ extensions status: | [Unified Shared Memory](USM/USM.adoc) | Supported(OpenCL) | | | [Use Pinned Memory Property](UsePinnedMemoryProperty/UsePinnedMemoryPropery.adoc) | Supported | | | [Level-Zero backend specification](LevelZeroBackend/LevelZeroBackend.md) | Supported | | +| [ITT annotations support](ITTAnnotations/ITTAnnotations.rst) | Supported | | Legend: From e80f1b44cd392105b550706768ba481f5de772f3 Mon Sep 17 00:00:00 2001 From: Vyacheslav Zakharin Date: Tue, 9 Mar 2021 16:08:25 -0800 Subject: [PATCH 10/11] Fomatting fixed. Signed-off-by: Vyacheslav Zakharin --- .../ITTAnnotations/ITTAnnotations.rst | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sycl/doc/extensions/ITTAnnotations/ITTAnnotations.rst b/sycl/doc/extensions/ITTAnnotations/ITTAnnotations.rst index 67fcc65dd68ad..0b72d138549ce 100644 --- a/sycl/doc/extensions/ITTAnnotations/ITTAnnotations.rst +++ b/sycl/doc/extensions/ITTAnnotations/ITTAnnotations.rst @@ -12,25 +12,25 @@ User APIs --------- The user code calling these functions must include the corresponding header -file(s) provided by `ittnotify` project (TBD: reference ITT repo here). +file(s) provided by ``ittnotify`` project (TBD: reference ITT repo here). -These functions are named using `__itt_notify_` prefix. +These functions are named using ``__itt_notify_`` prefix. Stub APIs --------- These functions are not defined in any header file, and their declarations follow exactly the declarations of the corresponding user APIs, except that -they have an extra `_stub` suffix in their names. +they have an extra ``_stub`` suffix in their names. These functions implement the ITT functionality in a way that allows the tools, such as Intel(R) Inspector, to recognize the ITT annotations and run their analysis methods based on that. -For SYCL device code these functions are implemented as `noinline` and `optnone` -functions so that the corresponding calls may be distinguished in the execution -trace. This is just one way for implementing them, and the actual implementation -may change in future. +For SYCL device code these functions are implemented as ``noinline`` and +``optnone`` functions so that the corresponding calls may be distinguished +in the execution trace. This is just one way for implementing them, +and the actual implementation may change in future. Compiler wrapper APIs --------------------- @@ -40,7 +40,7 @@ to be called from the compiler generated code. These thin wrappers just provide a convenient way for compilers to produce ITT annotations without generating too much code in the compilers' IR. -These functions have `_wrapper` suffix in their names. +These functions have ``_wrapper`` suffix in their names. Example ~~~~~~~ @@ -58,7 +58,7 @@ Example } } -A compiler may generate a simple call to `__itt_offload_wi_start_wrapper` +A compiler may generate a simple call to ``__itt_offload_wi_start_wrapper`` to annotate a kernel entry point. Compare this to the code inside the wrapper function, which a compiler would have to generate if there were no such a wrapper. @@ -74,6 +74,6 @@ enabled, we expect that the overall effect of the annotations will be minimized by the dead code elimination optimization(s) made by the device compilers. For this purpose we reserve a 1-byte specialization constant numbered -`4285822057` (`0xFF747469`). The users/tools/runtimes should set this +``4285822057`` (``0xFF747469``). The users/tools/runtimes should set this specialization constant to non-zero value to enable the ITT annotations in SYCL device code. From 0c091bb19f7dc787339b4dbcdcd04eed5619db02 Mon Sep 17 00:00:00 2001 From: Vyacheslav Zakharin Date: Wed, 10 Mar 2021 07:05:32 -0800 Subject: [PATCH 11/11] Updated __itt_atomic_mem_order_t enum. Signed-off-by: Vyacheslav Zakharin --- libdevice/device_itt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libdevice/device_itt.h b/libdevice/device_itt.h index ed1555424c95f..07614c6c532a6 100644 --- a/libdevice/device_itt.h +++ b/libdevice/device_itt.h @@ -27,7 +27,8 @@ enum __itt_atomic_mem_op_t { enum __itt_atomic_mem_order_t { __itt_mem_order_relaxed = 0, __itt_mem_order_acquire = 1, - __itt_mem_order_release = 2 + __itt_mem_order_release = 2, + __itt_mem_order_acquire_release = 3 }; // FIXME: must be enabled via -fdeclare-spirv-builtins