Skip to content

Commit 4d61a9b

Browse files
[Offload] Add MPI Proxy Plugin
Co-authored-by: Guilherme Valarini <[email protected]>
1 parent dbb4858 commit 4d61a9b

32 files changed

+4309
-57
lines changed

offload/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ if(DEFINED LIBOMPTARGET_BUILD_CUDA_PLUGIN OR
139139
message(WARNING "Option removed, use 'LIBOMPTARGET_PLUGINS_TO_BUILD' instead")
140140
endif()
141141

142-
set(LIBOMPTARGET_ALL_PLUGIN_TARGETS amdgpu cuda host)
142+
set(LIBOMPTARGET_ALL_PLUGIN_TARGETS mpi amdgpu cuda host)
143143
set(LIBOMPTARGET_PLUGINS_TO_BUILD "all" CACHE STRING
144144
"Semicolon-separated list of plugins to use: cuda, amdgpu, host or \"all\".")
145145

@@ -194,8 +194,10 @@ set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} powerpc64-ibm-linux-g
194194
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} powerpc64-ibm-linux-gnu-LTO")
195195
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-unknown-linux-gnu")
196196
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-unknown-linux-gnu-LTO")
197+
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-unknown-linux-gnu-mpi")
197198
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda")
198199
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-LTO")
200+
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-mpi")
199201
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-JIT-LTO")
200202
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} s390x-ibm-linux-gnu")
201203
set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} s390x-ibm-linux-gnu-LTO")
@@ -341,6 +343,8 @@ set(LIBOMPTARGET_LLVM_LIBRARY_DIR "${LLVM_LIBRARY_DIR}" CACHE STRING
341343
set(LIBOMPTARGET_LLVM_LIBRARY_INTDIR "${LIBOMPTARGET_INTDIR}" CACHE STRING
342344
"Path to folder where intermediate libraries will be output")
343345

346+
set(LIBOMPTARGET_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
347+
344348
# Build offloading plugins and device RTLs if they are available.
345349
add_subdirectory(plugins-nextgen)
346350
add_subdirectory(DeviceRTL)

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 63 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1208,130 +1208,141 @@ struct GenericPluginTy {
12081208

12091209
/// Returns non-zero if the \p Image is compatible with the plugin. This
12101210
/// function does not require the plugin to be initialized before use.
1211-
int32_t is_plugin_compatible(__tgt_device_image *Image);
1211+
virtual int32_t is_plugin_compatible(__tgt_device_image *Image);
12121212

12131213
/// Returns non-zero if the \p Image is compatible with the device.
1214-
int32_t is_device_compatible(int32_t DeviceId, __tgt_device_image *Image);
1214+
virtual int32_t is_device_compatible(int32_t DeviceId,
1215+
__tgt_device_image *Image);
12151216

12161217
/// Returns non-zero if the plugin device has been initialized.
1217-
int32_t is_device_initialized(int32_t DeviceId) const;
1218+
virtual int32_t is_device_initialized(int32_t DeviceId) const;
12181219

12191220
/// Initialize the device inside of the plugin.
1220-
int32_t init_device(int32_t DeviceId);
1221+
virtual int32_t init_device(int32_t DeviceId);
12211222

12221223
/// Return the number of devices this plugin can support.
1223-
int32_t number_of_devices();
1224+
virtual int32_t number_of_devices();
12241225

12251226
/// Returns non-zero if the data can be exchanged between the two devices.
1226-
int32_t is_data_exchangable(int32_t SrcDeviceId, int32_t DstDeviceId);
1227+
virtual int32_t is_data_exchangable(int32_t SrcDeviceId, int32_t DstDeviceId);
12271228

12281229
/// Initializes the record and replay mechanism inside the plugin.
1229-
int32_t initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
1230-
void *VAddr, bool isRecord, bool SaveOutput,
1231-
uint64_t &ReqPtrArgOffset);
1230+
virtual int32_t initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
1231+
void *VAddr, bool isRecord,
1232+
bool SaveOutput,
1233+
uint64_t &ReqPtrArgOffset);
12321234

12331235
/// Loads the associated binary into the plugin and returns a handle to it.
1234-
int32_t load_binary(int32_t DeviceId, __tgt_device_image *TgtImage,
1235-
__tgt_device_binary *Binary);
1236+
virtual int32_t load_binary(int32_t DeviceId, __tgt_device_image *TgtImage,
1237+
__tgt_device_binary *Binary);
12361238

12371239
/// Allocates memory that is accessively to the given device.
1238-
void *data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr, int32_t Kind);
1240+
virtual void *data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr,
1241+
int32_t Kind);
12391242

12401243
/// Deallocates memory on the given device.
1241-
int32_t data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind);
1244+
virtual int32_t data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind);
12421245

12431246
/// Locks / pins host memory using the plugin runtime.
1244-
int32_t data_lock(int32_t DeviceId, void *Ptr, int64_t Size,
1245-
void **LockedPtr);
1247+
virtual int32_t data_lock(int32_t DeviceId, void *Ptr, int64_t Size,
1248+
void **LockedPtr);
12461249

12471250
/// Unlocks / unpins host memory using the plugin runtime.
1248-
int32_t data_unlock(int32_t DeviceId, void *Ptr);
1251+
virtual int32_t data_unlock(int32_t DeviceId, void *Ptr);
12491252

12501253
/// Notify the runtime about a new mapping that has been created outside.
1251-
int32_t data_notify_mapped(int32_t DeviceId, void *HstPtr, int64_t Size);
1254+
virtual int32_t data_notify_mapped(int32_t DeviceId, void *HstPtr,
1255+
int64_t Size);
12521256

12531257
/// Notify t he runtime about a mapping that has been deleted.
1254-
int32_t data_notify_unmapped(int32_t DeviceId, void *HstPtr);
1258+
virtual int32_t data_notify_unmapped(int32_t DeviceId, void *HstPtr);
12551259

12561260
/// Copy data to the given device.
1257-
int32_t data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
1258-
int64_t Size);
1261+
virtual int32_t data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
1262+
int64_t Size);
12591263

12601264
/// Copy data to the given device asynchronously.
1261-
int32_t data_submit_async(int32_t DeviceId, void *TgtPtr, void *HstPtr,
1262-
int64_t Size, __tgt_async_info *AsyncInfoPtr);
1265+
virtual int32_t data_submit_async(int32_t DeviceId, void *TgtPtr,
1266+
void *HstPtr, int64_t Size,
1267+
__tgt_async_info *AsyncInfoPtr);
12631268

12641269
/// Copy data from the given device.
1265-
int32_t data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
1266-
int64_t Size);
1270+
virtual int32_t data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
1271+
int64_t Size);
12671272

12681273
/// Copy data from the given device asynchornously.
1269-
int32_t data_retrieve_async(int32_t DeviceId, void *HstPtr, void *TgtPtr,
1270-
int64_t Size, __tgt_async_info *AsyncInfoPtr);
1274+
virtual int32_t data_retrieve_async(int32_t DeviceId, void *HstPtr,
1275+
void *TgtPtr, int64_t Size,
1276+
__tgt_async_info *AsyncInfoPtr);
12711277

12721278
/// Exchange memory addresses between two devices.
1273-
int32_t data_exchange(int32_t SrcDeviceId, void *SrcPtr, int32_t DstDeviceId,
1274-
void *DstPtr, int64_t Size);
1279+
virtual int32_t data_exchange(int32_t SrcDeviceId, void *SrcPtr,
1280+
int32_t DstDeviceId, void *DstPtr,
1281+
int64_t Size);
12751282

12761283
/// Exchange memory addresses between two devices asynchronously.
1277-
int32_t data_exchange_async(int32_t SrcDeviceId, void *SrcPtr,
1278-
int DstDeviceId, void *DstPtr, int64_t Size,
1279-
__tgt_async_info *AsyncInfo);
1284+
virtual int32_t data_exchange_async(int32_t SrcDeviceId, void *SrcPtr,
1285+
int DstDeviceId, void *DstPtr,
1286+
int64_t Size,
1287+
__tgt_async_info *AsyncInfo);
12801288

12811289
/// Begin executing a kernel on the given device.
1282-
int32_t launch_kernel(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs,
1283-
ptrdiff_t *TgtOffsets, KernelArgsTy *KernelArgs,
1284-
__tgt_async_info *AsyncInfoPtr);
1290+
virtual int32_t launch_kernel(int32_t DeviceId, void *TgtEntryPtr,
1291+
void **TgtArgs, ptrdiff_t *TgtOffsets,
1292+
KernelArgsTy *KernelArgs,
1293+
__tgt_async_info *AsyncInfoPtr);
12851294

12861295
/// Synchronize an asyncrhonous queue with the plugin runtime.
1287-
int32_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr);
1296+
virtual int32_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr);
12881297

12891298
/// Query the current state of an asynchronous queue.
1290-
int32_t query_async(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr);
1299+
virtual int32_t query_async(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr);
12911300

12921301
/// Prints information about the given devices supported by the plugin.
1293-
void print_device_info(int32_t DeviceId);
1302+
virtual void print_device_info(int32_t DeviceId);
12941303

12951304
/// Creates an event in the given plugin if supported.
1296-
int32_t create_event(int32_t DeviceId, void **EventPtr);
1305+
virtual int32_t create_event(int32_t DeviceId, void **EventPtr);
12971306

12981307
/// Records an event that has occurred.
1299-
int32_t record_event(int32_t DeviceId, void *EventPtr,
1300-
__tgt_async_info *AsyncInfoPtr);
1308+
virtual int32_t record_event(int32_t DeviceId, void *EventPtr,
1309+
__tgt_async_info *AsyncInfoPtr);
13011310

13021311
/// Wait until an event has occurred.
1303-
int32_t wait_event(int32_t DeviceId, void *EventPtr,
1304-
__tgt_async_info *AsyncInfoPtr);
1312+
virtual int32_t wait_event(int32_t DeviceId, void *EventPtr,
1313+
__tgt_async_info *AsyncInfoPtr);
13051314

13061315
/// Syncrhonize execution until an event is done.
1307-
int32_t sync_event(int32_t DeviceId, void *EventPtr);
1316+
virtual int32_t sync_event(int32_t DeviceId, void *EventPtr);
13081317

13091318
/// Remove the event from the plugin.
1310-
int32_t destroy_event(int32_t DeviceId, void *EventPtr);
1319+
virtual int32_t destroy_event(int32_t DeviceId, void *EventPtr);
13111320

13121321
/// Remove the event from the plugin.
13131322
void set_info_flag(uint32_t NewInfoLevel);
13141323

13151324
/// Creates an asynchronous queue for the given plugin.
1316-
int32_t init_async_info(int32_t DeviceId, __tgt_async_info **AsyncInfoPtr);
1325+
virtual int32_t init_async_info(int32_t DeviceId,
1326+
__tgt_async_info **AsyncInfoPtr);
13171327

13181328
/// Creates device information to be used for diagnostics.
1319-
int32_t init_device_info(int32_t DeviceId, __tgt_device_info *DeviceInfo,
1320-
const char **ErrStr);
1329+
virtual int32_t init_device_info(int32_t DeviceId,
1330+
__tgt_device_info *DeviceInfo,
1331+
const char **ErrStr);
13211332

13221333
/// Sets the offset into the devices for use by OMPT.
13231334
int32_t set_device_identifier(int32_t UserId, int32_t DeviceId);
13241335

13251336
/// Returns if the plugin can support auotmatic copy.
1326-
int32_t use_auto_zero_copy(int32_t DeviceId);
1337+
virtual int32_t use_auto_zero_copy(int32_t DeviceId);
13271338

13281339
/// Look up a global symbol in the given binary.
1329-
int32_t get_global(__tgt_device_binary Binary, uint64_t Size,
1330-
const char *Name, void **DevicePtr);
1340+
virtual int32_t get_global(__tgt_device_binary Binary, uint64_t Size,
1341+
const char *Name, void **DevicePtr);
13311342

13321343
/// Look up a kernel function in the given binary.
1333-
int32_t get_function(__tgt_device_binary Binary, const char *Name,
1334-
void **KernelPtr);
1344+
virtual int32_t get_function(__tgt_device_binary Binary, const char *Name,
1345+
void **KernelPtr);
13351346

13361347
private:
13371348
/// Indicates if the platform runtime has been fully initialized.

offload/plugins-nextgen/host/src/rtl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
#endif
4444

4545
// The number of devices in this plugin.
46-
#define NUM_DEVICES 4
46+
#define NUM_DEVICES 1
4747

4848
namespace llvm {
4949
namespace omp {
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
# Looking for MPI...
2+
find_package(MPI QUIET)
3+
4+
# Check for C++20 support
5+
include(CheckCXXCompilerFlag)
6+
check_cxx_compiler_flag("-std=c++20" CXX_SUPPORTS_CXX20_FLAG)
7+
8+
if(NOT(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux"))
9+
message(STATUS "Not building MPI offloading plugin: only support MPI in Linux x86_64 or ppc64le hosts.")
10+
return()
11+
elseif(NOT MPI_CXX_FOUND)
12+
message(STATUS "Not building MPI offloading plugin: MPI not found in system.")
13+
return()
14+
elseif (NOT CXX_SUPPORTS_CXX20_FLAG)
15+
message(STATUS "Not building MPI offloading plugin: compiler does not support CXX20.")
16+
return()
17+
endif()
18+
19+
message(STATUS "Building MPI Proxy offloading plugin.")
20+
21+
# Event System
22+
add_subdirectory(event_system)
23+
24+
# MPI Plugin
25+
26+
# Create the library and add the default arguments.
27+
add_target_library(omptarget.rtl.mpi MPI)
28+
29+
target_sources(omptarget.rtl.mpi PRIVATE
30+
src/rtl.cpp
31+
)
32+
33+
target_link_libraries(omptarget.rtl.mpi PRIVATE
34+
EventSystem
35+
)
36+
37+
# Add include directories
38+
target_include_directories(omptarget.rtl.mpi PRIVATE
39+
${LIBOMPTARGET_INCLUDE_DIR})
40+
41+
# Set C++20 as the target standard for this plugin.
42+
set_target_properties(omptarget.rtl.mpi
43+
PROPERTIES
44+
CXX_STANDARD 20
45+
CXX_STANDARD_REQUIRED ON)
46+
47+
48+
# Configure testing for the MPI plugin.
49+
list(APPEND LIBOMPTARGET_TESTED_PLUGINS "omptarget.rtl.mpi")
50+
# Report to the parent scope that we are building a plugin for MPI.
51+
set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
52+
53+
# Define the target specific triples and ELF machine values.
54+
set(LIBOMPTARGET_SYSTEM_TARGETS
55+
"${LIBOMPTARGET_SYSTEM_TARGETS} x86_64-pc-linux-gnu-mpi nvptx64-nvidia-cuda-mpi" PARENT_SCOPE)
56+
57+
# Remote Plugin Manager
58+
message(STATUS "Building the llvm-offload-mpi-proxy-device")
59+
60+
set(LIBOMPTARGET_ALL_REMOTE_PLUGIN_TARGETS amdgpu cuda host)
61+
set(LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD "all" CACHE STRING
62+
"Semicolon-separated list of plugins to use: cuda, amdgpu, host or \"all\".")
63+
64+
if(LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD STREQUAL "all")
65+
set(LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD ${LIBOMPTARGET_ALL_REMOTE_PLUGIN_TARGETS})
66+
endif()
67+
68+
if(NOT CMAKE_SYSTEM_NAME MATCHES "Linux" AND
69+
"host" IN_LIST LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD)
70+
message(STATUS "Not building remote host plugin: only Linux systems are supported")
71+
list(REMOVE_ITEM LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD "host")
72+
endif()
73+
if(NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$"
74+
AND CMAKE_SYSTEM_NAME MATCHES "Linux"))
75+
if("amdgpu" IN_LIST LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD)
76+
message(STATUS "Not building remote AMDGPU plugin: only support AMDGPU in "
77+
"Linux x86_64, ppc64le, or aarch64 hosts")
78+
list(REMOVE_ITEM LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD "amdgpu")
79+
endif()
80+
if("cuda" IN_LIST LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD)
81+
message(STATUS "Not building remote CUDA plugin: only support CUDA in "
82+
"Linux x86_64, ppc64le, or aarch64 hosts")
83+
list(REMOVE_ITEM LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD "cuda")
84+
endif()
85+
endif()
86+
if("mpi" IN_LIST LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD)
87+
message(STATUS "It is not possible to build the mpi plugin inside "
88+
"the remote proxy device")
89+
list(REMOVE_ITEM LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD "mpi")
90+
endif()
91+
92+
message(STATUS "Building the MPI Plugin with support for remote offloading to "
93+
"the \"${LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD}\" plugins")
94+
95+
set(REMOTE_MPI_ENUM_PLUGIN_TARGETS "")
96+
foreach(plugin IN LISTS LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD)
97+
set(REMOTE_MPI_ENUM_PLUGIN_TARGETS
98+
"${REMOTE_MPI_ENUM_PLUGIN_TARGETS}PLUGIN_TARGET(${plugin})\n")
99+
endforeach()
100+
string(STRIP ${REMOTE_MPI_ENUM_PLUGIN_TARGETS} REMOTE_MPI_ENUM_PLUGIN_TARGETS)
101+
configure_file(
102+
${CMAKE_CURRENT_SOURCE_DIR}/src/RemoteTargets.def.in
103+
${LIBOMPTARGET_BINARY_INCLUDE_DIR}/Shared/RemoteTargets.def
104+
)
105+
106+
llvm_add_tool(OPENMP llvm-offload-mpi-proxy-device
107+
src/ProxyDevice.cpp
108+
src/RemotePluginManager.cpp
109+
${LIBOMPTARGET_SRC_DIR}/OpenMP/OMPT/Callback.cpp
110+
)
111+
112+
llvm_update_compile_flags(llvm-offload-mpi-proxy-device)
113+
114+
target_link_libraries(llvm-offload-mpi-proxy-device PRIVATE
115+
EventSystem
116+
LLVMSupport
117+
omp
118+
)
119+
120+
add_dependencies(llvm-offload-mpi-proxy-device omp)
121+
122+
target_include_directories(llvm-offload-mpi-proxy-device PRIVATE
123+
${LIBOMPTARGET_INCLUDE_DIR}
124+
${LIBOMPTARGET_LLVM_INCLUDE_DIRS}
125+
${LIBOMPTARGET_BINARY_INCLUDE_DIR}
126+
)
127+
128+
foreach(plugin IN LISTS LIBOMPTARGET_REMOTE_PLUGINS_TO_BUILD)
129+
target_link_libraries(llvm-offload-mpi-proxy-device PRIVATE omptarget.rtl.${plugin})
130+
add_dependencies(llvm-offload-mpi-proxy-device omptarget.rtl.${plugin})
131+
endforeach()
132+
133+
# Set C++20 as the target standard for this plugin.
134+
set_target_properties(llvm-offload-mpi-proxy-device
135+
PROPERTIES
136+
CXX_STANDARD 20
137+
CXX_STANDARD_REQUIRED ON)
138+
139+
target_compile_definitions(llvm-offload-mpi-proxy-device PRIVATE
140+
TARGET_NAME=llvm-offload-mpi-proxy-device
141+
DEBUG_PREFIX="MPIProxyDevice")

0 commit comments

Comments
 (0)