Skip to content

Commit 76c0736

Browse files
jhuber6ronlieb
authored andcommitted
Statically link all plugin runtimes llvm#87009
Summary: This patch overhauls the libomptarget and plugin interface. Currently, we define a C API and compile each plugin as a separate shared library. Then, libomptarget loads these API functions and forwards its internal calls to them. This was originally designed to allow multiple implementations of a library to be live. However, since then no one has used this functionality and it prevents us from using much nicer interfaces. If the old behavior is desired it should instead be implemented as a separate plugin. This patch replaces the PluginAdaptorTy interface with the GenericPluginTy that is used by the plugins. Each plugin exports a createPlugin_<name> function that is used to get the specific implementation. This code is now shared with libomptarget. There are some notable improvements to this. Massively improved lifetimes of life runtime objects The plugins can use a C++ interface Global state does not need to be duplicated for each plugin + libomptarget Easier to use and add features and improve error handling Less function call overhead / Improved LTO performance. Additional changes in this plugin are related to contending with the fact that state is now shared. Initialization and deinitialization is now handled correctly and in phase with the underlying runtime, allowing us to actually know when something is getting deallocated. Change-Id: I50c434d194f8922787b1075a07dad48425b87c95
1 parent d3f703a commit 76c0736

File tree

29 files changed

+689
-1335
lines changed

29 files changed

+689
-1335
lines changed

clang/test/Driver/linker-wrapper-image.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@
3030

3131
// OPENMP: define internal void @.omp_offloading.descriptor_reg() section ".text.startup" {
3232
// OPENMP-NEXT: entry:
33-
// OPENMP-NEXT: %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg)
3433
// OPENMP-NEXT: call void @__tgt_register_lib(ptr @.omp_offloading.descriptor)
34+
// OPENMP-NEXT: %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg)
3535
// OPENMP-NEXT: ret void
3636
// OPENMP-NEXT: }
3737

llvm/lib/Frontend/Offloading/OffloadWrapper.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -232,12 +232,13 @@ void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
232232
// Construct function body
233233
IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
234234

235+
Builder.CreateCall(RegFuncC, BinDesc);
236+
235237
// Register the destructors with 'atexit'. This is expected by the CUDA
236238
// runtime and ensures that we clean up before dynamic objects are destroyed.
237-
// This needs to be done before the runtime is called and registers its own.
239+
// This needs to be done after plugin initialization to ensure that it is
240+
// called before the plugin runtime is destroyed.
238241
Builder.CreateCall(AtExit, UnregFunc);
239-
240-
Builder.CreateCall(RegFuncC, BinDesc);
241242
Builder.CreateRetVoid();
242243

243244
// Add this function to constructors.

offload/include/PluginManager.h

Lines changed: 16 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@
1313
#ifndef OMPTARGET_PLUGIN_MANAGER_H
1414
#define OMPTARGET_PLUGIN_MANAGER_H
1515

16+
#include "PluginInterface.h"
17+
1618
#include "DeviceImage.h"
1719
#include "ExclusiveAccess.h"
1820
#include "Shared/APITypes.h"
19-
#include "Shared/PluginAPI.h"
2021
#include "Shared/Requirements.h"
2122

2223
#include "device.h"
@@ -34,38 +35,7 @@
3435
#include <mutex>
3536
#include <string>
3637

37-
struct PluginManager;
38-
39-
/// Plugin adaptors should be created via `PluginAdaptorTy::create` which will
40-
/// invoke the constructor and call `PluginAdaptorTy::init`. Eventual errors are
41-
/// reported back to the caller, otherwise a valid and initialized adaptor is
42-
/// returned.
43-
struct PluginAdaptorTy {
44-
/// Try to create a plugin adaptor from a filename.
45-
static llvm::Expected<std::unique_ptr<PluginAdaptorTy>>
46-
create(const std::string &Name);
47-
48-
/// Name of the shared object file representing the plugin.
49-
std::string Name;
50-
51-
/// Access to the shared object file representing the plugin.
52-
std::unique_ptr<llvm::sys::DynamicLibrary> LibraryHandler;
53-
54-
#define PLUGIN_API_HANDLE(NAME) \
55-
using NAME##_ty = decltype(__tgt_rtl_##NAME); \
56-
NAME##_ty *NAME = nullptr;
57-
58-
#include "Shared/PluginAPI.inc"
59-
#undef PLUGIN_API_HANDLE
60-
61-
/// Create a plugin adaptor for filename \p Name with a dynamic library \p DL.
62-
PluginAdaptorTy(const std::string &Name,
63-
std::unique_ptr<llvm::sys::DynamicLibrary> DL);
64-
65-
/// Initialize the plugin adaptor, this can fail in which case the adaptor is
66-
/// useless.
67-
llvm::Error init();
68-
};
38+
using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy;
6939

7040
/// Struct for the data required to handle plugins
7141
struct PluginManager {
@@ -80,6 +50,8 @@ struct PluginManager {
8050

8151
void init();
8252

53+
void deinit();
54+
8355
// Register a shared library with all (compatible) RTLs.
8456
void registerLib(__tgt_bin_desc *Desc);
8557

@@ -92,10 +64,9 @@ struct PluginManager {
9264
std::make_unique<DeviceImageTy>(TgtBinDesc, TgtDeviceImage));
9365
}
9466

95-
/// Initialize as many devices as possible for this plugin adaptor. Devices
96-
/// that fail to initialize are ignored. Returns the offset the devices were
97-
/// registered at.
98-
void initDevices(PluginAdaptorTy &RTL);
67+
/// Initialize as many devices as possible for this plugin. Devices that fail
68+
/// to initialize are ignored.
69+
void initDevices(GenericPluginTy &RTL);
9970

10071
/// Return the device presented to the user as device \p DeviceNo if it is
10172
/// initialized and ready. Otherwise return an error explaining the problem.
@@ -151,8 +122,8 @@ struct PluginManager {
151122
// Initialize all plugins.
152123
void initAllPlugins();
153124

154-
/// Iterator range for all plugin adaptors (in use or not, but always valid).
155-
auto pluginAdaptors() { return llvm::make_pointee_range(PluginAdaptors); }
125+
/// Iterator range for all plugins (in use or not, but always valid).
126+
auto plugins() { return llvm::make_pointee_range(Plugins); }
156127

157128
/// Return the user provided requirements.
158129
int64_t getRequirements() const { return Requirements.getRequirements(); }
@@ -164,14 +135,14 @@ struct PluginManager {
164135
bool RTLsLoaded = false;
165136
llvm::SmallVector<__tgt_bin_desc *> DelayedBinDesc;
166137

167-
// List of all plugin adaptors, in use or not.
168-
llvm::SmallVector<std::unique_ptr<PluginAdaptorTy>> PluginAdaptors;
138+
// List of all plugins, in use or not.
139+
llvm::SmallVector<std::unique_ptr<GenericPluginTy>> Plugins;
169140

170-
// Mapping of plugin adaptors to offsets in the device table.
171-
llvm::DenseMap<const PluginAdaptorTy *, int32_t> DeviceOffsets;
141+
// Mapping of plugins to offsets in the device table.
142+
llvm::DenseMap<const GenericPluginTy *, int32_t> DeviceOffsets;
172143

173-
// Mapping of plugin adaptors to the number of used devices.
174-
llvm::DenseMap<const PluginAdaptorTy *, int32_t> DeviceUsed;
144+
// Mapping of plugins to the number of used devices.
145+
llvm::DenseMap<const GenericPluginTy *, int32_t> DeviceUsed;
175146

176147
// Set of all device images currently in use.
177148
llvm::DenseSet<const __tgt_device_image *> UsedImages;

offload/include/device.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,17 @@
3333
#include "llvm/ADT/DenseMap.h"
3434
#include "llvm/ADT/SmallVector.h"
3535

36+
#include "PluginInterface.h"
37+
38+
using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy;
39+
3640
// Forward declarations.
37-
struct PluginAdaptorTy;
3841
struct __tgt_bin_desc;
3942
struct __tgt_target_table;
4043

4144
struct DeviceTy {
4245
int32_t DeviceID;
43-
PluginAdaptorTy *RTL;
46+
GenericPluginTy *RTL;
4447
int32_t RTLDeviceID;
4548
/// The physical number of processors that may concurrently execute a team
4649
/// For cuda, this is number of SMs, for amdgcn, this is number of CUs.
@@ -51,7 +54,7 @@ struct DeviceTy {
5154
/// Controlled via environment flag OMPX_FORCE_SYNC_REGIONS
5255
bool ForceSynchronousTargetRegions = false;
5356

54-
DeviceTy(PluginAdaptorTy *RTL, int32_t DeviceID, int32_t RTLDeviceID);
57+
DeviceTy(GenericPluginTy *RTL, int32_t DeviceID, int32_t RTLDeviceID);
5558
// DeviceTy is not copyable
5659
DeviceTy(const DeviceTy &D) = delete;
5760
DeviceTy &operator=(const DeviceTy &D) = delete;

offload/plugins-nextgen/CMakeLists.txt

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
set(common_dir ${CMAKE_CURRENT_SOURCE_DIR}/common)
1515
add_subdirectory(common)
1616
function(add_target_library target_name lib_name)
17-
add_llvm_library(${target_name} SHARED
17+
add_llvm_library(${target_name} STATIC
1818
LINK_COMPONENTS
1919
${LLVM_TARGETS_TO_BUILD}
2020
AggressiveInstCombine
@@ -45,27 +45,17 @@ function(add_target_library target_name lib_name)
4545
)
4646

4747
llvm_update_compile_flags(${target_name})
48+
target_include_directories(${target_name} PUBLIC ${common_dir}/include)
49+
if(OMPT_TARGET_DEFAULT AND LIBOMPTARGET_OMPT_SUPPORT)
50+
target_include_directories(${target_name} PUBLIC ${common_dir}/OMPT)
51+
endif()
4852
target_link_libraries(${target_name} PRIVATE
4953
PluginCommon ${llvm_libs} ${OPENMP_PTHREAD_LIB})
5054

5155
target_compile_definitions(${target_name} PRIVATE TARGET_NAME=${lib_name})
5256
target_compile_definitions(${target_name} PRIVATE
5357
DEBUG_PREFIX="TARGET ${lib_name} RTL")
54-
55-
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
56-
# On FreeBSD, the 'environ' symbol is undefined at link time, but resolved by
57-
# the dynamic linker at runtime. Therefore, allow the symbol to be undefined
58-
# when creating a shared library.
59-
target_link_libraries(${target_name} PRIVATE "-Wl,--allow-shlib-undefined")
60-
else()
61-
target_link_libraries(${target_name} PRIVATE "-Wl,-z,defs")
62-
endif()
63-
64-
if(LIBOMP_HAVE_VERSION_SCRIPT_FLAG)
65-
target_link_libraries(${target_name} PRIVATE
66-
"-Wl,--version-script=${common_dir}/../exports")
67-
endif()
68-
set_target_properties(${target_name} PROPERTIES CXX_VISIBILITY_PRESET protected)
58+
set_target_properties(${target_name} PROPERTIES POSITION_INDEPENDENT_CODE ON)
6959
endfunction()
7060

7161
foreach(plugin IN LISTS LIBOMPTARGET_PLUGINS_TO_BUILD)

offload/plugins-nextgen/amdgpu/CMakeLists.txt

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,3 @@ else()
7676
libomptarget_say("Not generating AMDGPU tests, no supported devices detected."
7777
" Use 'LIBOMPTARGET_FORCE_AMDGPU_TESTS' to override.")
7878
endif()
79-
80-
# Install plugin under the lib destination folder.
81-
install(TARGETS omptarget.rtl.amdgpu LIBRARY DESTINATION "${OFFLOAD_INSTALL_LIBDIR}")
82-
if(NOT DEFINED CMAKE_INSTALL_RPATH)
83-
set_target_properties(omptarget.rtl.amdgpu PROPERTIES
84-
INSTALL_RPATH "$ORIGIN")
85-
endif()
86-
87-
set_target_properties(omptarget.rtl.amdgpu PROPERTIES
88-
BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
89-
CXX_VISIBILITY_PRESET protected)

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
688688
WGSizeName += "_wg_size";
689689
GlobalTy HostConstWGSize(WGSizeName, sizeof(decltype(ConstWGSize)),
690690
&ConstWGSize);
691-
GenericGlobalHandlerTy &GHandler = PluginTy::get().getGlobalHandler();
691+
GenericGlobalHandlerTy &GHandler = Device.Plugin.getGlobalHandler();
692692
if (auto Err =
693693
GHandler.readGlobalFromImage(Device, AMDImage, HostConstWGSize)) {
694694
// In case it is not found, we simply stick with the defaults.
@@ -2911,7 +2911,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
29112911
if (!AMDGPUKernel)
29122912
return Plugin::error("Failed to allocate memory for AMDGPU kernel");
29132913

2914-
new (AMDGPUKernel) AMDGPUKernelTy(Name, PluginTy::get().getGlobalHandler());
2914+
new (AMDGPUKernel) AMDGPUKernelTy(Name, Plugin.getGlobalHandler());
29152915

29162916
return *AMDGPUKernel;
29172917
}
@@ -4274,10 +4274,6 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
42744274
UInt32Envar KernTrace("LIBOMPTARGET_KERNEL_TRACE", 0);
42754275
llvm::omp::target::plugin::PrintKernelTrace = KernTrace.get();
42764276

4277-
#ifdef OMPT_SUPPORT
4278-
ompt::connectLibrary();
4279-
#endif
4280-
42814277
// Register event handler to detect memory errors on the devices.
42824278
Status = hsa_amd_register_system_event_handler(eventHandler, nullptr);
42834279
if (auto Err = Plugin::check(
@@ -4366,6 +4362,8 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
43664362

43674363
Triple::ArchType getTripleArch() const override { return Triple::amdgcn; }
43684364

4365+
const char *getName() const override { return GETNAME(TARGET_NAME); }
4366+
43694367
/// Get the ELF code for recognizing the compatible image binary.
43704368
uint16_t getMagicElfBits() const override { return ELF::EM_AMDGPU; }
43714369

@@ -4685,8 +4683,6 @@ Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
46854683
return Plugin::success();
46864684
}
46874685

4688-
GenericPluginTy *PluginTy::createPlugin() { return new AMDGPUPluginTy(); }
4689-
46904686
template <typename... ArgsTy>
46914687
static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) {
46924688
hsa_status_t ResultCode = static_cast<hsa_status_t>(Code);
@@ -4779,17 +4775,22 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
47794775
namespace llvm::omp::target::plugin {
47804776

47814777
/// Enable/disable kernel profiling for the given device.
4782-
void setOmptQueueProfile(int DeviceId, int Enable) {
4783-
AMDGPUPluginTy &Plugin = PluginTy::get<AMDGPUPluginTy>();
4784-
static_cast<AMDGPUDeviceTy &>(Plugin.getDevice(DeviceId))
4785-
.setOmptQueueProfile(Enable);
4778+
void setOmptQueueProfile(void *Device, int Enable) {
4779+
reinterpret_cast<llvm::omp::target::plugin::AMDGPUDeviceTy *>(Device)
4780+
->setOmptQueueProfile(Enable);
47864781
}
47874782

47884783
} // namespace llvm::omp::target::plugin
47894784

47904785
/// Enable/disable kernel profiling for the given device.
4791-
void setGlobalOmptKernelProfile(int DeviceId, int Enable) {
4792-
llvm::omp::target::plugin::setOmptQueueProfile(DeviceId, Enable);
4786+
void setGlobalOmptKernelProfile(void *Device, int Enable) {
4787+
llvm::omp::target::plugin::setOmptQueueProfile(Device, Enable);
47934788
}
47944789

47954790
#endif
4791+
4792+
extern "C" {
4793+
llvm::omp::target::plugin::GenericPluginTy *createPlugin_amdgpu() {
4794+
return new llvm::omp::target::plugin::AMDGPUPluginTy();
4795+
}
4796+
}

offload/plugins-nextgen/common/CMakeLists.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,4 @@ target_include_directories(PluginCommon PUBLIC
6666
${LIBOMPTARGET_INCLUDE_DIR}
6767
)
6868

69-
set_target_properties(PluginCommon PROPERTIES
70-
POSITION_INDEPENDENT_CODE ON
71-
CXX_VISIBILITY_PRESET protected)
69+
set_target_properties(PluginCommon PROPERTIES POSITION_INDEPENDENT_CODE ON)

offload/plugins-nextgen/common/OMPT/OmptDeviceTracing.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
#define DEBUG_PREFIX "OMPT"
2929

3030
extern void setOmptAsyncCopyProfile(bool Enable);
31-
extern void setGlobalOmptKernelProfile(int DeviceId, int Enable);
31+
extern void setGlobalOmptKernelProfile(void *Device, int Enable);
3232
extern uint64_t getSystemTimestampInNs();
3333

3434
namespace llvm {

offload/plugins-nextgen/common/OMPT/OmptTracing.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,6 @@ double llvm::omp::target::ompt::HostToDeviceOffset = .0;
5353

5454
std::map<ompt_device_t *, int32_t> llvm::omp::target::ompt::Devices;
5555

56-
std::atomic<uint64_t> llvm::omp::target::ompt::TracingTypesEnabled{0};
57-
58-
bool llvm::omp::target::ompt::TracingActive = false;
59-
60-
void llvm::omp::target::ompt::setTracingState(bool State) {
61-
TracingActive = State;
62-
}
63-
6456
std::shared_ptr<llvm::sys::DynamicLibrary>
6557
llvm::omp::target::ompt::getParentLibrary() {
6658
static bool ParentLibraryAssigned = false;
@@ -141,7 +133,7 @@ ompt_start_trace(ompt_device_t *Device, ompt_callback_buffer_request_t Request,
141133
setOmptAsyncCopyProfile(/*Enable=*/true);
142134
// Enable queue dispatch profiling
143135
if (DeviceId >= 0)
144-
setGlobalOmptKernelProfile(DeviceId, /*Enable=*/1);
136+
setGlobalOmptKernelProfile(Device, /*Enable=*/1);
145137
else
146138
REPORT("May not enable kernel profiling for invalid device id=%d\n",
147139
DeviceId);
@@ -179,7 +171,7 @@ OMPT_API_ROUTINE int ompt_stop_trace(ompt_device_t *Device) {
179171
// Disable queue dispatch profiling
180172
int DeviceId = getDeviceId(Device);
181173
if (DeviceId >= 0)
182-
setGlobalOmptKernelProfile(DeviceId, /*Enable=*/0);
174+
setGlobalOmptKernelProfile(Device, /*Enable=*/0);
183175
else
184176
REPORT("May not disable kernel profiling for invalid device id=%d\n",
185177
DeviceId);

0 commit comments

Comments
 (0)