From a23790deefce67100af48adc78731dfeb45b8510 Mon Sep 17 00:00:00 2001 From: Markus Baur Date: Tue, 14 Jul 2020 17:49:37 +0200 Subject: [PATCH 1/9] Add code for target library offloading from sx-aurora to host. There is a host(sx-aurora) and a device(x86 host) part of this rtl. --- .../plugins/vh/src/device-rtl.cpp | 31 ++ openmp/libomptarget/plugins/vh/src/rtl.cpp | 421 ++++++++++++++++++ 2 files changed, 452 insertions(+) create mode 100644 openmp/libomptarget/plugins/vh/src/device-rtl.cpp create mode 100644 openmp/libomptarget/plugins/vh/src/rtl.cpp diff --git a/openmp/libomptarget/plugins/vh/src/device-rtl.cpp b/openmp/libomptarget/plugins/vh/src/device-rtl.cpp new file mode 100644 index 000000000000..7df10a0f74ff --- /dev/null +++ b/openmp/libomptarget/plugins/vh/src/device-rtl.cpp @@ -0,0 +1,31 @@ +//===-RTLs/generic-64bit/src/rtl.cpp - Target RTLs Implementation - C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// RTL for vector host, a generic 64-bit machine +// +//===----------------------------------------------------------------------===// + +#include +#include + +extern "C" uint64_t alloc_vh(uint64_t Size) { + return reinterpret_cast(malloc(Size)); +} + +extern "C" uint64_t submit_vh(veos_handle *handle, uint64_t src, uint64_t size, uint64_t* dst) { + return ve_recv_data(handle, src, size, dst); +} + +extern "C" uint64_t retrieve_vh(veos_handle *handle, uint64_t dst, uint64_t size, uint64_t* src) { + return ve_send_data(handle, dst, size, src); +} + +extern "C" uint64_t delete_vh(uint64_t TargetPtr) { + free((void*)TargetPtr); + return 0; // this is discarded +} diff --git a/openmp/libomptarget/plugins/vh/src/rtl.cpp b/openmp/libomptarget/plugins/vh/src/rtl.cpp new file mode 100644 index 000000000000..c9fff8eaba13 --- /dev/null +++ b/openmp/libomptarget/plugins/vh/src/rtl.cpp @@ -0,0 +1,421 @@ +//===-RTLs/nec-aurora/src/rtl.cpp - Target RTLs Implementation - C++ -*-======// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// +// +// RTL for NEC Aurora TSUBASA machines +// +//===----------------------------------------------------------------------===// + +#include "omptargetplugin.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef TARGET_ELF_ID +#define TARGET_ELF_ID 0 +#endif + +#ifdef OMPTARGET_DEBUG +static int DebugLevel = 0; + +#define GETNAME2(name) #name +#define GETNAME(name) GETNAME2(name) +#define DP(...) \ + do { \ + if (DebugLevel > 0) { \ + DEBUGP("Target " GETNAME(TARGET_NAME) " RTL", __VA_ARGS__); \ + } \ + } while (false) +#else // OMPTARGET_DEBUG +#define DP(...) \ + {} +#endif // OMPTARGET_DEBUG + + +struct DynLibTy { + char *FileName; + vhcall_handle Handle; +}; + +/// Keep entries table per device. +struct FuncOrGblEntryTy { + __tgt_target_table Table; + std::vector<__tgt_offload_entry> Entries; +}; + +class RTLDeviceInfoTy { + std::list FuncOrGblEntry; + +public: + uint64_t LibraryHandle; + std::list DynLibs; + + void buildOffloadTableFromHost(int32_t device_id, uint64_t VHCallLibHandle, + __tgt_offload_entry *HostBegin, + __tgt_offload_entry *HostEnd) { + FuncOrGblEntry.emplace_back(); + std::vector<__tgt_offload_entry> &T = FuncOrGblEntry.back().Entries; + T.clear(); + + for (__tgt_offload_entry *i = HostBegin; i != HostEnd; ++i) { + char *SymbolName = i->name; + // we need the symbol id so we have to ask + DP("Looking up symbol: %s\n", SymbolName); + int64_t SymbolTargetAddr = vhcall_find(VHCallLibHandle, SymbolName); + __tgt_offload_entry Entry; + + if (!SymbolTargetAddr) { + DP("Symbol %s not found in target image\n", SymbolName); + Entry = {NULL, NULL, 0, 0, 0}; + } else { + DP("Found symbol %s successfully in target image (addr: %p)\n", + SymbolName, reinterpret_cast(SymbolTargetAddr)); + Entry = { reinterpret_cast(SymbolTargetAddr), + i->name, + i->size, + i->flags, + 0 }; + } + + T.push_back(Entry); + } + + FuncOrGblEntry.back().Table.EntriesBegin = &T.front(); + FuncOrGblEntry.back().Table.EntriesEnd = &T.back() + 1; + } + + __tgt_target_table *getOffloadTable(int32_t device_id) { + return &FuncOrGblEntry.back().Table; + } + + RTLDeviceInfoTy() { +#ifdef OMPTARGET_DEBUG + if (char *envStr = getenv("LIBOMPTARGET_DEBUG")) { + DebugLevel = std::stoi(envStr); + } +#endif // OMPTARGET_DEBUG + } + + ~RTLDeviceInfoTy() { + // TODO unload libraries with vhcall_uninstall + for (auto &lib : DynLibs) { + vhcall_uninstall(lib.Handle); + if (lib.FileName) { + remove(lib.FileName); + } + } + } +}; + +static RTLDeviceInfoTy DeviceInfo; +/* +static int target_run_function_wait(uint32_t DeviceID, uint64_t FuncAddr, + struct veo_args *args, uint64_t *RetVal) { + DP("Running function with entry point %p\n", + reinterpret_cast(FuncAddr)); + uint64_t RequestHandle = + veo_call_async(DeviceInfo.Contexts[DeviceID], FuncAddr, args); + if (RequestHandle == VEO_REQUEST_ID_INVALID) { + DP("Execution of entry point %p failed\n", + reinterpret_cast(FuncAddr)); + return OFFLOAD_FAIL; + } + + DP("Function at address %p called (VEO request ID: %" PRIu64 ")\n", + reinterpret_cast(FuncAddr), RequestHandle); + + int ret = veo_call_wait_result(DeviceInfo.Contexts[DeviceID], RequestHandle, + RetVal); + if (ret != 0) { + DP("Waiting for entry point %p failed (Error code %d)\n", + reinterpret_cast(FuncAddr), ret); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; +} + +*/ +// Return the number of available devices of the type supported by the +// target RTL. +// In this case there is the assumption that there is one device. +// TODO: maybe respond with the number of NUMA nodes of the host +int32_t __tgt_rtl_number_of_devices(void) { return 1; } + +// Return an integer different from zero if the provided device image can be +// supported by the runtime. The functionality is similar to comparing the +// result of __tgt__rtl__load__binary to NULL. However, this is meant to be a +// lightweight query to determine if the RTL is suitable for an image without +// having to load the library, which can be expensive. +int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { +#if TARGET_ELF_ID < 1 + return 0; +#else + // This check would need `#include "../../common/elf_common.c"` which in turn + // would pull libelf as a dependency. + // It's not that much of an issue since aurora binaries can only use this + // target plugin. + // return elf_check_machine(Image, TARGET_ELF_ID); + return 1; +#endif +} + +int64_t __tgt_rtl_data_alloc_vh_id; +int64_t __tgt_rtl_data_submit_vh_id; +int64_t __tgt_rtl_data_retrieve_vh_id; +int64_t __tgt_rtl_data_delete_vh_id; +// Initialize the specified device. In case of success return 0; otherwise +// return an error code. +int32_t __tgt_rtl_init_device(int32_t ID) { + DP("Loading support library on vh.\n"); + + const char* libname = "libomptarget.device-rtl.vh.so"; + auto device_rtl_handle = vhcall_install(libname); + if (device_rtl_handle == -1) { + DP("Failed to load support library \"%s\".\n", libname); + DP("Please make sure it is in your library path.\n"); + return OFFLOAD_FAIL; + } + + DynLibTy Lib = {NULL, device_rtl_handle}; + DeviceInfo.DynLibs.push_back(Lib); + + __tgt_rtl_data_alloc_vh_id = vhcall_find(device_rtl_handle, "alloc_vh"); + __tgt_rtl_data_submit_vh_id = vhcall_find(device_rtl_handle, "submit_vh"); + __tgt_rtl_data_retrieve_vh_id = vhcall_find(device_rtl_handle, "retrieve_vh"); + __tgt_rtl_data_delete_vh_id = vhcall_find(device_rtl_handle, "delete_vh"); + + if (__tgt_rtl_data_alloc_vh_id == -1 || __tgt_rtl_data_submit_vh_id == -1 || + __tgt_rtl_data_retrieve_vh_id == -1 || __tgt_rtl_data_delete_vh_id == -1) + { + DP("Failed to find required functions in %s.\n", libname); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; +} + +// Pass an executable image section described by image to the specified +// device and prepare an address table of target entities. In case of error, +// return NULL. Otherwise, return a pointer to the built address table. +// Individual entries in the table may also be NULL, when the corresponding +// offload region is not supported on the target device. +__tgt_target_table *__tgt_rtl_load_binary(int32_t ID, + __tgt_device_image *Image) { + DP("Dev %d: load binary from " DPxMOD " image\n", ID, + DPxPTR(Image->ImageStart)); + + assert(ID >= 0 && "bad dev id"); + + size_t ImageSize = (size_t)Image->ImageEnd - (size_t)Image->ImageStart; + size_t NumEntries = (size_t)(Image->EntriesEnd - Image->EntriesBegin); + DP("Expecting to have %zd entries defined.\n", NumEntries); + + // load dynamic library and get the entry points. We use the dl library + // to do the loading of the library, but we could do it directly to avoid the + // dump to the temporary file. + // + // 1) Create tmp file with the library contents. + // 2) Use dlopen to load the file and dlsym to retrieve the symbols. + char tmp_name[] = "/tmp/tmpfile_XXXXXX"; + int tmp_fd = mkstemp(tmp_name); + + if (tmp_fd == -1) { + return NULL; + } + + FILE *ftmp = fdopen(tmp_fd, "wb"); + + if (!ftmp) { + DP("fdopen() for %s failed. Could not write target image\n", tmp_name); + return NULL; + } + + fwrite(Image->ImageStart, ImageSize, 1, ftmp); + + // at least for the static case we need to change the permissions + chmod(tmp_name, 0700); + + DP("Wrote target image to %s. ImageSize=%zu\n", tmp_name, ImageSize); + + fclose(ftmp); + + + DP("Host successfully initialized as offload target."); + + uint64_t LibHandle = 0UL; + LibHandle = vhcall_install(tmp_name); + + if (!LibHandle) { + DP("vhcall_install() failed: LibHandle=%" PRIu64 + " Name=%s. \n", LibHandle, tmp_name); + return NULL; + } + + DP("Successfully loaded library dynamically\n"); + + DynLibTy Lib = {tmp_name, LibHandle}; + DeviceInfo.DynLibs.push_back(Lib); + DeviceInfo.LibraryHandle = LibHandle; + + DeviceInfo.buildOffloadTableFromHost(ID, LibHandle, Image->EntriesBegin, + Image->EntriesEnd); + + return DeviceInfo.getOffloadTable(ID); +} + +// Allocate data on the particular target device, of the specified size. +// HostPtr is a address of the host data the allocated target data +// will be associated with (HostPtr may be NULL if it is not known at +// allocation time, like for example it would be for target data that +// is allocated by omp_target_alloc() API). Return address of the +// allocated data on the target that will be used by libomptarget.so to +// initialize the target data mapping structures. These addresses are +// used to generate a table of target variables to pass to +// __tgt_rtl_run_region(). The __tgt_rtl_data_alloc() returns NULL in +// case an error occurred on the target device. +void *__tgt_rtl_data_alloc(int32_t ID, int64_t Size, void *HostPtr) { + uint64_t ret; + + DP("Allocate target memory: size=%" PRIu64 "\n", Size); + + auto args = vhcall_args_alloc(); + vhcall_args_set_u64(args, 0, (uint64_t)Size); + + if (vhcall_invoke_with_args(__tgt_rtl_data_alloc_vh_id, args, &ret) != 0 || + ret == 0) { + DP("malloc on vh failed.\n"); + vhcall_args_free(args); + return nullptr; + } + + vhcall_args_free(args); + return reinterpret_cast(ret); +} + +// Pass the data content to the target device using the target address. +// In case of success, return zero. Otherwise, return an error code. +int32_t __tgt_rtl_data_submit(int32_t ID, void *TargetPtr, void *HostPtr, + int64_t Size) { + DP("Submitting data to vh.\n"); + auto args = vhcall_args_alloc(); + vhcall_args_set_veoshandle(args, 0); + vhcall_args_set_u64(args, 1, (uint64_t)HostPtr); + vhcall_args_set_u64(args, 2, (uint64_t)Size); + vhcall_args_set_u64(args, 3, (uint64_t)TargetPtr); + + uint64_t ret; + if (vhcall_invoke_with_args(__tgt_rtl_data_submit_vh_id, args, &ret) != 0 || + ret != 0) { + DP("Data transfer failed.\n"); + vhcall_args_free(args); + return OFFLOAD_FAIL; + } + + vhcall_args_free(args); + return OFFLOAD_SUCCESS; +} + +// Retrieve the data content from the target device using its address. +// In case of success, return zero. Otherwise, return an error code. +int32_t __tgt_rtl_data_retrieve(int32_t ID, void *HostPtr, void *TargetPtr, + int64_t Size) { + DP("Retrieving data from vh.\n"); + auto args = vhcall_args_alloc(); + vhcall_args_set_veoshandle(args, 0); + vhcall_args_set_u64(args, 1, (uint64_t)HostPtr); + vhcall_args_set_u64(args, 2, (uint64_t)Size); + vhcall_args_set_u64(args, 3, (uint64_t)TargetPtr); + + uint64_t ret; + if (vhcall_invoke_with_args(__tgt_rtl_data_retrieve_vh_id, args, &ret) != 0 || + ret != 0) { + DP("Data transfer failed.\n"); + vhcall_args_free(args); + return OFFLOAD_FAIL; + } + + vhcall_args_free(args); + return OFFLOAD_SUCCESS; +} + +// De-allocate the data referenced by target ptr on the device. In case of +// success, return zero. Otherwise, return an error code. +int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr) { + uint64_t ret; + + DP("Release target memory: ptr=%" PRIu64 "\n", (uint64_t)TargetPtr); + + auto args = vhcall_args_alloc(); + vhcall_args_set_u64(args, 0, (uint64_t)TargetPtr); + + if (vhcall_invoke_with_args(__tgt_rtl_data_delete_vh_id, args, &ret) != 0) { + DP("free on vh failed.\n"); + vhcall_args_free(args); + return OFFLOAD_FAIL; + } + + vhcall_args_free(args); + return OFFLOAD_SUCCESS; +} + +// Similar to __tgt_rtl_run_target_region, but additionally specify the +// number of teams to be created and a number of threads in each team. +int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args, + ptrdiff_t *Offsets, int32_t NumArgs, + int32_t NumTeams, int32_t ThreadLimit, + uint64_t loop_tripcount) { + int ret; + DP("Running function with entry point %p\n", Entry); + + // ignore team num and thread limit. + std::vector ptrs(NumArgs); + + auto TargetArgs = vhcall_args_alloc(); + + if (TargetArgs == NULL) { + DP("Could not allocate VHCALL args\n"); + return OFFLOAD_FAIL; + } + + for (int i = 0; i < NumArgs; ++i) { + ret = vhcall_args_set_u64(TargetArgs, i, (intptr_t)Args[i]); + + if (ret != 0) { + DP("vhcall_args_set_u64() has returned %d for argnum=%d and value %p\n", + ret, i, Args[i]); + vhcall_args_free(TargetArgs); + return OFFLOAD_FAIL; + } + } + + uint64_t RetVal; + if (vhcall_invoke_with_args( reinterpret_cast(Entry), TargetArgs, &RetVal) != 0) { + DP("Execution of entry point %p failed\n", Entry); + vhcall_args_free(TargetArgs); + return OFFLOAD_FAIL; + } + vhcall_args_free(TargetArgs); + return OFFLOAD_SUCCESS; +} + +// Transfer control to the offloaded entry Entry on the target device. +// Args and Offsets are arrays of NumArgs size of target addresses and +// offsets. An offset should be added to the target address before passing it +// to the outlined function on device side. In case of success, return zero. +// Otherwise, return an error code. +int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args, + ptrdiff_t *Offsets, int32_t NumArgs) { + return __tgt_rtl_run_target_team_region(ID, Entry, Args, Offsets, NumArgs, 1, + 1, 0); +} From 63f32dc7d4290e3ff779b3a91738fd47548f1d16 Mon Sep 17 00:00:00 2001 From: Markus Baur Date: Tue, 14 Jul 2020 18:14:39 +0200 Subject: [PATCH 2/9] Add cmake lookup for required headers and libraries. --- .../Modules/LibomptargetGetDependencies.cmake | 158 ++++++++++++++++++ 1 file changed, 158 insertions(+) diff --git a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake index 4e999036d173..a76ede3a889e 100644 --- a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake +++ b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake @@ -15,6 +15,9 @@ # libffi : required to launch target kernels given function and argument # pointers. # CUDA : required to control offloading to NVIDIA GPUs. +# VEOS : required to control offloading to NEC Aurora. +# VHCALL : required to control offloading from NEC Aurora to the host. +# VEPSEUDO : required to control offloading from NEC Aurora to the host. include (FindPackageHandleStandardArgs) @@ -166,6 +169,161 @@ find_package_handle_standard_args( mark_as_advanced(LIBOMPTARGET_DEP_CUDA_DRIVER_LIBRARIES) +################################################################################ +# Looking for VEO... +################################################################################ + +find_path ( + LIBOMPTARGET_DEP_VEO_INCLUDE_DIR + NAMES + ve_offload.h + PATHS + /usr/include + /usr/local/include + /opt/local/include + /sw/include + /opt/nec/ve/veos/include + ENV CPATH + PATH_SUFFIXES + libveo) + +find_library ( + LIBOMPTARGET_DEP_VEO_LIBRARIES + NAMES + veo + PATHS + /usr/lib + /usr/local/lib + /opt/local/lib + /sw/lib + /opt/nec/ve/veos/lib64 + ENV LIBRARY_PATH + ENV LD_LIBRARY_PATH) + +find_library( + LIBOMPTARGET_DEP_VEOSINFO_LIBRARIES + NAMES + veosinfo + PATHS + /usr/lib + /usr/local/lib + /opt/local/lib + /sw/lib + /opt/nec/ve/veos/lib64 + ENV LIBRARY_PATH + ENV LD_LIBRARY_PATH) + +set(LIBOMPTARGET_DEP_VEO_INCLUDE_DIRS ${LIBOMPTARGET_DEP_VEO_INCLUDE_DIR}) +find_package_handle_standard_args( + LIBOMPTARGET_DEP_VEO + DEFAULT_MSG + LIBOMPTARGET_DEP_VEO_LIBRARIES + LIBOMPTARGET_DEP_VEOSINFO_LIBRARIES + LIBOMPTARGET_DEP_VEO_INCLUDE_DIRS) + +mark_as_advanced( + LIBOMPTARGET_DEP_VEO_FOUND + LIBOMPTARGET_DEP_VEO_INCLUDE_DIRS) + +################################################################################ +# Looking for VHCALL +################################################################################ +find_path ( + LIBOMPTARGET_DEP_VHCALL_INCLUDE_DIR + NAMES + libvhcall.h + PATHS + /usr/include + /usr/local/include + /opt/local/include + /sw/include + /opt/nec/ve/include + ENV CPATH + PATH_SUFFIXES + libvhcall) # TODO is this neccessary? + +# Needed on vh side +find_path ( + LIBOMPTARGET_DEP_VHCALL_INCLUDE_DIR + NAMES + libvepseudo.h + PATHS + /usr/include + /usr/local/include + /opt/local/include + /sw/include + /opt/nec/ve/include + /opt/nec/ve/veos/include + ENV CPATH + PATH_SUFFIXES + libvhcall) # TODO is this neccessary? + +find_library ( + LIBOMPTARGET_DEP_VHCALL_LIBRARIES + NAMES + sysve + PATHS + /usr/lib + /usr/local/lib + /opt/local/lib + /sw/lib + /opt/nec/ve/lib + ENV LIBRARY_PATH + ENV LD_LIBRARY_PATH) + +set(LIBOMPTARGET_DEP_VHCALL_INCLUDE_DIRS ${LIBOMPTARGET_DEP_VHCALL_INCLUDE_DIR}) +find_package_handle_standard_args( + LIBOMPTARGET_DEP_VHCALL + DEFAULT_MSG + LIBOMPTARGET_DEP_VHCALL_LIBRARIES + LIBOMPTARGET_DEP_VHCALL_INCLUDE_DIRS) + +mark_as_advanced( + LIBOMPTARGET_DEP_VHCALL_FOUND + LIBOMPTARGET_DEP_VHCALL_INCLUDE_DIRS) + +################################################################################ +# Looking for VEPSEUDO +################################################################################ + +find_path ( + LIBOMPTARGET_DEP_VEPSEUDO_INCLUDE_DIR + NAMES + libvepseudo.h + PATHS + /usr/include + /usr/local/include + /opt/local/include + /sw/include + /opt/nec/ve/veos/include + ENV CPATH + PATH_SUFFIXES + libvhcall) # TODO is this neccessary? + +find_library ( + LIBOMPTARGET_DEP_VEPSEUDO_LIBRARIES + NAMES + vepseudo + PATHS + /usr/lib + /usr/local/lib + /opt/local/lib + /sw/lib + /opt/nec/ve/veos/lib + ENV LIBRARY_PATH + ENV LD_LIBRARY_PATH) + +set(LIBOMPTARGET_DEP_VEPSEUDO_INCLUDE_DIRS ${LIBOMPTARGET_DEP_VEPSEUDO_INCLUDE_DIR}) +find_package_handle_standard_args( + LIBOMPTARGET_DEP_VEPSEUDO + DEFAULT_MSG + LIBOMPTARGET_DEP_VEPSEUDO_LIBRARIES + LIBOMPTARGET_DEP_VEPSEUDO_INCLUDE_DIRS) + +mark_as_advanced( + LIBOMPTARGET_DEP_VEPSEUDO_FOUND + LIBOMPTARGET_DEP_VEPSEUDO_INCLUDE_DIRS) + ################################################################################ # Looking for CUDA libdevice subdirectory # From 228c4fc428d17157c1f3a681a3ad2acd1e7d08e6 Mon Sep 17 00:00:00 2001 From: Markus Baur Date: Tue, 14 Jul 2020 18:15:53 +0200 Subject: [PATCH 3/9] Add cmake file for sx-aurora to host offload plugin. --- openmp/libomptarget/plugins/vh/CMakeLists.txt | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 openmp/libomptarget/plugins/vh/CMakeLists.txt diff --git a/openmp/libomptarget/plugins/vh/CMakeLists.txt b/openmp/libomptarget/plugins/vh/CMakeLists.txt new file mode 100644 index 000000000000..58513b1f8cc8 --- /dev/null +++ b/openmp/libomptarget/plugins/vh/CMakeLists.txt @@ -0,0 +1,66 @@ +##===----------------------------------------------------------------------===## +# +# Build a plugin for a NEC SX-Aurora machine if available. +# Offloading is done from SX-Aurora device to the host processor. +# +##===----------------------------------------------------------------------===## + + +if(${LIBOMPTARGET_DEP_VHCALL_FOUND} AND ${LIBOMPTARGET_DEP_VEPSEUDO_FOUND}) + set(tmachine_name "vh") + set(tmachine_libname "vh") + set(tmachine_triple "x86_64-pc-linux-gnu") + set(elf_machine_id 62) + + if("${LIBOMP_ARCH}" STREQUAL "ve") + libomptarget_say("Building SX-Aurora VE to VH offloading plugin - ve part.") + set(additional_libs "") + set(additional_libs ${LIBOMPTARGET_DEP_VHCALL_LIBRARIES} + ${additional_libs}) + + + include_directories(${LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIR}) + include_directories(${LIBOMPTARGET_DEP_VHCALL_INCLUDE_DIR}) + + + # Define macro to be used as prefix of the runtime messages for this target. + add_definitions("-DTARGET_NAME=${tmachine_name}") + + # Define macro with the ELF ID for this target. + add_definitions("-DTARGET_ELF_ID=${elf_machine_id}") + + add_library("omptarget.rtl.${tmachine_libname}" SHARED + ${CMAKE_CURRENT_SOURCE_DIR}/src/rtl.cpp) + + # Install plugin under the lib destination folder. + install(TARGETS "omptarget.rtl.${tmachine_libname}" + LIBRARY DESTINATION lib${OPENMP_LIBDIR_SUFFIX}) + + target_link_libraries( + "omptarget.rtl.${tmachine_libname}" + ${additional_libs} + "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports -Wl,-z,defs") + + # Report to the parent scope that we are building a plugin. + set(LIBOMPTARGET_SYSTEM_TARGETS + "${LIBOMPTARGET_SYSTEM_TARGETS} ${tmachine_triple}" PARENT_SCOPE) + else() + libomptarget_say("Building SX-Aurora VE to VH offloading plugin - vh part.") + + include_directories(${LIBOMPTARGET_DEP_VEPSEUDO_INCLUDE_DIR}) + + add_library( + "omptarget.device-rtl.${tmachine_libname}" SHARED + ${CMAKE_CURRENT_SOURCE_DIR}/src/device-rtl.cpp) + + # should end up in the default library path on the vector host + install(TARGETS "omptarget.device-rtl.${tmachine_libname}" + LIBRARY DESTINATION lib${OPENMP_LIBDIR_SUFFIX}) + + target_link_libraries( + "omptarget.device-rtl.${tmachine_libname}" + ${LIBOMPTARGET_DEP_VEPSEUDO_LIBRARIES}) + endif() +else() + libomptarget_say("Not building vh plugin: libvhcall not found.") +endif() From 2593ed707ff3452a0fad15d172d1d5273c7ee07c Mon Sep 17 00:00:00 2001 From: Markus Baur Date: Tue, 14 Jul 2020 18:18:31 +0200 Subject: [PATCH 4/9] Enable the new plugin in the global cmake system. --- openmp/CMakeLists.txt | 2 +- openmp/libomptarget/deviceRTLs/CMakeLists.txt | 4 +++- openmp/libomptarget/plugins/CMakeLists.txt | 14 +++++++++----- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index ae197fb7f97a..02bc813a8c98 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -63,7 +63,7 @@ set(ENABLE_LIBOMPTARGET ON) # Since the device plugins are only supported on Linux anyway, # there is no point in trying to compile libomptarget on other OSes. # Disable libomptarget on VE also since it doesn't work at the moment. -if (APPLE OR WIN32 OR ("${LIBOMP_ARCH}" STREQUAL "ve") OR NOT OPENMP_HAVE_STD_CPP14_FLAG) +if (APPLE OR WIN32 OR NOT OPENMP_HAVE_STD_CPP14_FLAG) set(ENABLE_LIBOMPTARGET OFF) endif() diff --git a/openmp/libomptarget/deviceRTLs/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/CMakeLists.txt index 3df94eac0727..21bf271f120b 100644 --- a/openmp/libomptarget/deviceRTLs/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/CMakeLists.txt @@ -10,4 +10,6 @@ # ##===----------------------------------------------------------------------===## -add_subdirectory(nvptx) +if(NOT "${LIBOMP_ARCH}" STREQUAL "ve") + add_subdirectory(nvptx) +endif() diff --git a/openmp/libomptarget/plugins/CMakeLists.txt b/openmp/libomptarget/plugins/CMakeLists.txt index bb3f9c908087..e94179e6c02f 100644 --- a/openmp/libomptarget/plugins/CMakeLists.txt +++ b/openmp/libomptarget/plugins/CMakeLists.txt @@ -65,11 +65,15 @@ else() endif() endmacro() -add_subdirectory(aarch64) -add_subdirectory(cuda) -add_subdirectory(ppc64) -add_subdirectory(ppc64le) -add_subdirectory(x86_64) +if(NOT "${LIBOMP_ARCH}" STREQUAL "ve") + add_subdirectory(aarch64) + add_subdirectory(cuda) + add_subdirectory(ppc64) + add_subdirectory(ppc64le) + add_subdirectory(ve) + add_subdirectory(x86_64) +endif() +add_subdirectory(vh) # Make sure the parent scope can see the plugins that will be created. set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}" PARENT_SCOPE) From 67ac1344356719868049cc3d034027e065fe519f Mon Sep 17 00:00:00 2001 From: Markus Baur Date: Tue, 14 Jul 2020 18:20:21 +0200 Subject: [PATCH 5/9] Enable the new plugin during runtime. --- openmp/libomptarget/src/rtl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp index 1439f67e7c64..eea08415e3b2 100644 --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -23,6 +23,8 @@ // List of all plugins that can support offloading. static const char *RTLNames[] = { + /* SX-Aurora VH target */ "libomptarget.rtl.vh.so", + /* SX-Aurora VE target */ "libomptarget.rtl.ve.so", /* PowerPC target */ "libomptarget.rtl.ppc64.so", /* x86_64 target */ "libomptarget.rtl.x86_64.so", /* CUDA target */ "libomptarget.rtl.cuda.so", From da9a5ec307cc958a64aa41d06344aa04d874cb9b Mon Sep 17 00:00:00 2001 From: Markus Baur Date: Tue, 14 Jul 2020 18:12:43 +0200 Subject: [PATCH 6/9] Fix whitespace at line endings. --- .../Modules/LibomptargetGetDependencies.cmake | 18 +++++------ openmp/libomptarget/plugins/CMakeLists.txt | 30 +++++++++---------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake index a76ede3a889e..06c2f7b0f386 100644 --- a/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake +++ b/openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake @@ -12,7 +12,7 @@ # components of libomptarget. These are the dependencies we have: # # libelf : required by some targets to handle the ELF files at runtime. -# libffi : required to launch target kernels given function and argument +# libffi : required to launch target kernels given function and argument # pointers. # CUDA : required to control offloading to NVIDIA GPUs. # VEOS : required to control offloading to NEC Aurora. @@ -49,18 +49,18 @@ find_library ( /sw/lib ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) - + set(LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIRS ${LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIR}) find_package_handle_standard_args( - LIBOMPTARGET_DEP_LIBELF + LIBOMPTARGET_DEP_LIBELF DEFAULT_MSG LIBOMPTARGET_DEP_LIBELF_LIBRARIES LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIRS) mark_as_advanced( - LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIRS + LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIRS LIBOMPTARGET_DEP_LIBELF_LIBRARIES) - + ################################################################################ # Looking for libffi... ################################################################################ @@ -102,15 +102,15 @@ endif() set(LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIRS ${LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIR}) find_package_handle_standard_args( - LIBOMPTARGET_DEP_LIBFFI + LIBOMPTARGET_DEP_LIBFFI DEFAULT_MSG LIBOMPTARGET_DEP_LIBFFI_LIBRARIES LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIRS) mark_as_advanced( - LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIRS + LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIRS LIBOMPTARGET_DEP_LIBFFI_LIBRARIES) - + ################################################################################ # Looking for CUDA... ################################################################################ @@ -128,7 +128,7 @@ endif(NOT "${LIBOMP_ARCH}" STREQUAL "ve") set(LIBOMPTARGET_DEP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) mark_as_advanced( - LIBOMPTARGET_DEP_CUDA_FOUND + LIBOMPTARGET_DEP_CUDA_FOUND LIBOMPTARGET_DEP_CUDA_INCLUDE_DIRS) ################################################################################ diff --git a/openmp/libomptarget/plugins/CMakeLists.txt b/openmp/libomptarget/plugins/CMakeLists.txt index e94179e6c02f..31622f511ca9 100644 --- a/openmp/libomptarget/plugins/CMakeLists.txt +++ b/openmp/libomptarget/plugins/CMakeLists.txt @@ -1,9 +1,9 @@ ##===----------------------------------------------------------------------===## -# +# # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# +# ##===----------------------------------------------------------------------===## # # Build plugins for the user system if available. @@ -19,41 +19,41 @@ macro(build_generic_elf64 tmachine tmachine_name tmachine_libname tmachine_tripl if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$") if(LIBOMPTARGET_DEP_LIBELF_FOUND) if(LIBOMPTARGET_DEP_LIBFFI_FOUND) - + libomptarget_say("Building ${tmachine_name} offloading plugin.") - + include_directories(${LIBOMPTARGET_DEP_LIBFFI_INCLUDE_DIR}) include_directories(${LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIR}) - + # Define macro to be used as prefix of the runtime messages for this target. add_definitions("-DTARGET_NAME=${tmachine_name}") - + # Define macro with the ELF ID for this target. add_definitions("-DTARGET_ELF_ID=${elf_machine_id}") - - add_library("omptarget.rtl.${tmachine_libname}" SHARED + + add_library("omptarget.rtl.${tmachine_libname}" SHARED ${CMAKE_CURRENT_SOURCE_DIR}/../generic-elf-64bit/src/rtl.cpp) - + # Install plugin under the lib destination folder. - install(TARGETS "omptarget.rtl.${tmachine_libname}" + install(TARGETS "omptarget.rtl.${tmachine_libname}" LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}") - + target_link_libraries( "omptarget.rtl.${tmachine_libname}" - ${LIBOMPTARGET_DEP_LIBFFI_LIBRARIES} + ${LIBOMPTARGET_DEP_LIBFFI_LIBRARIES} ${LIBOMPTARGET_DEP_LIBELF_LIBRARIES} dl "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports") - + list(APPEND LIBOMPTARGET_TESTED_PLUGINS "omptarget.rtl.${tmachine_libname}") # Report to the parent scope that we are building a plugin. - set(LIBOMPTARGET_SYSTEM_TARGETS + set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} ${tmachine_triple}" PARENT_SCOPE) set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE) - + else(LIBOMPTARGET_DEP_LIBFFI_FOUND) libomptarget_say("Not building ${tmachine_name} offloading plugin: libffi dependency not found.") endif(LIBOMPTARGET_DEP_LIBFFI_FOUND) From c82455337e8cc092ffb21e7a47372a342e06720a Mon Sep 17 00:00:00 2001 From: Markus Baur Date: Tue, 14 Jul 2020 18:21:42 +0200 Subject: [PATCH 7/9] Minor improvements in build scripts. --- openmp/libomptarget/plugins/ve/CMakeLists.txt | 49 +++++++++++++++++++ openmp/runtime/CMakeLists.txt | 2 +- 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 openmp/libomptarget/plugins/ve/CMakeLists.txt diff --git a/openmp/libomptarget/plugins/ve/CMakeLists.txt b/openmp/libomptarget/plugins/ve/CMakeLists.txt new file mode 100644 index 000000000000..3b4364d47254 --- /dev/null +++ b/openmp/libomptarget/plugins/ve/CMakeLists.txt @@ -0,0 +1,49 @@ +##===----------------------------------------------------------------------===## +# +# Build a plugin for a NEC Aurora machine if available. (Can also run on host) +# +##===----------------------------------------------------------------------===## + + +if(${LIBOMPTARGET_DEP_VEO_FOUND}) + libomptarget_say("Building SX-Aurora VE offloading plugin.") + set(additional_libs "") + set(additional_libs ${LIBOMPTARGET_DEP_VEO_LIBRARIES} + ${LIBOMPTARGET_DEP_VEOSINFO_LIBRARIES} + ${additional_libs}) + + set(tmachine_name "ve") + set(tmachine_libname "ve") + set(tmachine_triple "ve-unknown-linux-unknown") + set(elf_machine_id 251) + + include_directories(${LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIR}) + include_directories(${LIBOMPTARGET_DEP_VEO_INCLUDE_DIR}) + + + # Define macro to be used as prefix of the runtime messages for this target. + add_definitions("-DTARGET_NAME=${tmachine_name}") + + # Define macro with the ELF ID for this target. + add_definitions("-DTARGET_ELF_ID=${elf_machine_id}") + + add_library("omptarget.rtl.${tmachine_libname}" SHARED + ${CMAKE_CURRENT_SOURCE_DIR}/src/rtl.cpp) + + # Install plugin under the lib destination folder. + install(TARGETS "omptarget.rtl.${tmachine_libname}" + LIBRARY DESTINATION lib${OPENMP_LIBDIR_SUFFIX}) + + target_link_libraries( + "omptarget.rtl.${tmachine_libname}" + ${LIBOMPTARGET_DEP_LIBFFI_LIBRARIES} + ${LIBOMPTARGET_DEP_LIBELF_LIBRARIES} + ${additional_libs} + "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports -Wl,-z,defs") + + # Report to the parent scope that we are building a plugin. + set(LIBOMPTARGET_SYSTEM_TARGETS + "${LIBOMPTARGET_SYSTEM_TARGETS} ${tmachine_triple}" PARENT_SCOPE) +else() + libomptarget_say("Not building ve plugin: libveo or libveosinfo not found.") +endif() diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt index 696e8c38fbcd..cae201a3011b 100644 --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -166,7 +166,7 @@ elseif("${LIBOMP_ARCH}" STREQUAL "mips") # MIPS architecture set(MIPS TRUE) elseif("${LIBOMP_ARCH}" STREQUAL "mips64") # MIPS64 architecture set(MIPS64 TRUE) - elseif("${LIBOMP_ARCH}" STREQUAL "riscv64") # RISCV64 architecture +elseif("${LIBOMP_ARCH}" STREQUAL "riscv64") # RISCV64 architecture set(RISCV64 TRUE) elseif("${LIBOMP_ARCH}" STREQUAL "ve") # VE architecture set(VE TRUE) From 356458d5bbbb99300130377a099e1604335eeda4 Mon Sep 17 00:00:00 2001 From: Markus Baur Date: Fri, 17 Jul 2020 14:01:58 +0200 Subject: [PATCH 8/9] Add comments, improve coding style. --- openmp/libomptarget/deviceRTLs/CMakeLists.txt | 1 + openmp/libomptarget/plugins/CMakeLists.txt | 1 + openmp/libomptarget/plugins/vh/CMakeLists.txt | 5 +++- .../plugins/vh/src/device-rtl.cpp | 12 ++++++-- openmp/libomptarget/plugins/vh/src/rtl.cpp | 29 ++----------------- 5 files changed, 18 insertions(+), 30 deletions(-) diff --git a/openmp/libomptarget/deviceRTLs/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/CMakeLists.txt index 21bf271f120b..762c95824165 100644 --- a/openmp/libomptarget/deviceRTLs/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/CMakeLists.txt @@ -10,6 +10,7 @@ # ##===----------------------------------------------------------------------===## +# Offloading from a nec sx aurora device to a nvidia device makes no sense if(NOT "${LIBOMP_ARCH}" STREQUAL "ve") add_subdirectory(nvptx) endif() diff --git a/openmp/libomptarget/plugins/CMakeLists.txt b/openmp/libomptarget/plugins/CMakeLists.txt index 31622f511ca9..801b3d50a4f6 100644 --- a/openmp/libomptarget/plugins/CMakeLists.txt +++ b/openmp/libomptarget/plugins/CMakeLists.txt @@ -65,6 +65,7 @@ else() endif() endmacro() +# Offloading to these targets from a nec sx-aurora device makes no sense and is thus disabled. if(NOT "${LIBOMP_ARCH}" STREQUAL "ve") add_subdirectory(aarch64) add_subdirectory(cuda) diff --git a/openmp/libomptarget/plugins/vh/CMakeLists.txt b/openmp/libomptarget/plugins/vh/CMakeLists.txt index 58513b1f8cc8..6f782c6638ad 100644 --- a/openmp/libomptarget/plugins/vh/CMakeLists.txt +++ b/openmp/libomptarget/plugins/vh/CMakeLists.txt @@ -12,6 +12,7 @@ if(${LIBOMPTARGET_DEP_VHCALL_FOUND} AND ${LIBOMPTARGET_DEP_VEPSEUDO_FOUND}) set(tmachine_triple "x86_64-pc-linux-gnu") set(elf_machine_id 62) + # Build the target plugin on ve side if("${LIBOMP_ARCH}" STREQUAL "ve") libomptarget_say("Building SX-Aurora VE to VH offloading plugin - ve part.") set(additional_libs "") @@ -44,7 +45,9 @@ if(${LIBOMPTARGET_DEP_VHCALL_FOUND} AND ${LIBOMPTARGET_DEP_VEPSEUDO_FOUND}) # Report to the parent scope that we are building a plugin. set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} ${tmachine_triple}" PARENT_SCOPE) - else() + else() # Build the support library on vector host side + # TODO replace this with direct loading of the required libraries and call + # them directly with vhcall. libomptarget_say("Building SX-Aurora VE to VH offloading plugin - vh part.") include_directories(${LIBOMPTARGET_DEP_VEPSEUDO_INCLUDE_DIR}) diff --git a/openmp/libomptarget/plugins/vh/src/device-rtl.cpp b/openmp/libomptarget/plugins/vh/src/device-rtl.cpp index 7df10a0f74ff..e8c22741694a 100644 --- a/openmp/libomptarget/plugins/vh/src/device-rtl.cpp +++ b/openmp/libomptarget/plugins/vh/src/device-rtl.cpp @@ -13,18 +13,26 @@ #include #include +// This library is a thin wrapper for funtions used by the ve part + +// Allocate memory on the device which is the the vector host in this case extern "C" uint64_t alloc_vh(uint64_t Size) { return reinterpret_cast(malloc(Size)); } -extern "C" uint64_t submit_vh(veos_handle *handle, uint64_t src, uint64_t size, uint64_t* dst) { +// Submit data to vh, receive it from ve +extern "C" uint64_t submit_vh(veos_handle *handle, uint64_t src, + uint64_t size, uint64_t* dst) { return ve_recv_data(handle, src, size, dst); } -extern "C" uint64_t retrieve_vh(veos_handle *handle, uint64_t dst, uint64_t size, uint64_t* src) { +// Retrieve data from vh, send it to ve +extern "C" uint64_t retrieve_vh(veos_handle *handle, uint64_t dst, + uint64_t size, uint64_t* src) { return ve_send_data(handle, dst, size, src); } +// Delete memory on the device which is the the vector host in this case extern "C" uint64_t delete_vh(uint64_t TargetPtr) { free((void*)TargetPtr); return 0; // this is discarded diff --git a/openmp/libomptarget/plugins/vh/src/rtl.cpp b/openmp/libomptarget/plugins/vh/src/rtl.cpp index c9fff8eaba13..35c62582f90f 100644 --- a/openmp/libomptarget/plugins/vh/src/rtl.cpp +++ b/openmp/libomptarget/plugins/vh/src/rtl.cpp @@ -109,7 +109,6 @@ class RTLDeviceInfoTy { } ~RTLDeviceInfoTy() { - // TODO unload libraries with vhcall_uninstall for (auto &lib : DynLibs) { vhcall_uninstall(lib.Handle); if (lib.FileName) { @@ -120,33 +119,8 @@ class RTLDeviceInfoTy { }; static RTLDeviceInfoTy DeviceInfo; -/* -static int target_run_function_wait(uint32_t DeviceID, uint64_t FuncAddr, - struct veo_args *args, uint64_t *RetVal) { - DP("Running function with entry point %p\n", - reinterpret_cast(FuncAddr)); - uint64_t RequestHandle = - veo_call_async(DeviceInfo.Contexts[DeviceID], FuncAddr, args); - if (RequestHandle == VEO_REQUEST_ID_INVALID) { - DP("Execution of entry point %p failed\n", - reinterpret_cast(FuncAddr)); - return OFFLOAD_FAIL; - } - DP("Function at address %p called (VEO request ID: %" PRIu64 ")\n", - reinterpret_cast(FuncAddr), RequestHandle); - - int ret = veo_call_wait_result(DeviceInfo.Contexts[DeviceID], RequestHandle, - RetVal); - if (ret != 0) { - DP("Waiting for entry point %p failed (Error code %d)\n", - reinterpret_cast(FuncAddr), ret); - return OFFLOAD_FAIL; - } - return OFFLOAD_SUCCESS; -} -*/ // Return the number of available devices of the type supported by the // target RTL. // In this case there is the assumption that there is one device. @@ -400,7 +374,8 @@ int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args, } uint64_t RetVal; - if (vhcall_invoke_with_args( reinterpret_cast(Entry), TargetArgs, &RetVal) != 0) { + auto entrypoint = reinterpret_cast(Entry); + if (vhcall_invoke_with_args(entrypoint, TargetArgs, &RetVal) != 0) { DP("Execution of entry point %p failed\n", Entry); vhcall_args_free(TargetArgs); return OFFLOAD_FAIL; From 02b5c6ead240458e0a31c317d42369e908b84fe2 Mon Sep 17 00:00:00 2001 From: Markus Baur Date: Tue, 21 Jul 2020 13:53:17 +0200 Subject: [PATCH 9/9] Add README to vh plugin. --- openmp/libomptarget/plugins/vh/README.md | 91 ++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 openmp/libomptarget/plugins/vh/README.md diff --git a/openmp/libomptarget/plugins/vh/README.md b/openmp/libomptarget/plugins/vh/README.md new file mode 100644 index 000000000000..dbba0fe0c148 --- /dev/null +++ b/openmp/libomptarget/plugins/vh/README.md @@ -0,0 +1,91 @@ +# VH target plugin + +A target plugin to support openmp target offloading from an nec sx-aurora device +to the host processor. +Note that this requires binaries built for sx-aurora and _not_ for the `x86_64` +host. +Please note that this plugin currently only works for `C` and not for `C++` +sources. + +## Usage + +In order to use this target plugin you have to get hold of a version of clang, +llvm and libopenmp which was built with support for it. +Building those versions can only be done on a machine with a nec sx-aurora +development environment. +In the following paragraphs it is assumed that you have such a version of clang, +llvm and openmp. + +### Getting started +You have to build for the sx-aurora target (`--target=ve-linux`), enable openmp +(`-fopenmp`) and enable build for `x86_64` offloading targets +(`-fopenmp-targets=x86_64-pc-linux-gnu`) + +The aurora binary has to be able to locate this library and it's call stack as +well as dependencies. +Currently, the following linking arguments are needed for the binary: + +TODO: rpath stuff at lib build time? +``` +-Wl,-rpath-link /opt/nec/ve/lib/ +-Wl,-rpath-link ${path_to_llvm_installation}/lib/clang/11.0.0/lib/linux/ve/ +``` + +Furthermore, the `device-rtl` library must be accessible from the standard search +path on the host, e.g by adding +```LD_LIBRARY_PATH="$LD_LIBRARY_PATH;${path_to_llvm_installation}/lib/"``` +to the call of your executable. + +### `openmp target` features +Currently only synchronous operations are supported, so the `async` clause is +not supported. +Other clauses or combinations thereof *should* work. + +## How to build + +First there needs to be a version of clang which can compile the openmp +subproject for the host machine as well as crosscompile for the `ve-linux` +target. +This compiler should then be used to generate a openmp target library for the +host system and a version of the library for the vector engine. +Both libraries must be installed properly and are then ready for use. + +## How to add another target plugin + +This section is intended for programmers who want to add their own target +library because it was really hard for me to find all the code sections which I +had to change during development of this plugin. + +### Copy another plugin +In order to get a skeleton implementation it is recommended to copy another +plugin. +It is recommended to use the one which most closely fits your target and +implementation plan. + +### Add the lookup for required libraries +If the new plugin needs additional libraries and headers add them to the +`openmp/libomptarget/cmake/Modules/LibomptargetGetDependencies.cmake` +cmake file. +Then adjust the conditional compile options in the `CMakeLists.txt` of the new +plugin. + +### Add it to the global build chain +Add the directory name of your plugin to the list of plugins to scan and build +in `openmp/libomptarget/plugins/CMakeLists.txt`. + +### Code the plugin +Change all the needed code in the copied `CMakeLists.txt` and implement the +functions in `src/rtl.cpp`. +Once all is done and compiles it is time to activate the plugin during runtime. + +### Activate the plugin in `libomptarget` +In order to make `libomptarget` look for the new plugin it must be added to the +list of target plugins in `openmp/libomptarget/src/rtl.cpp`. +Please note that it must be the name given to the library in the new +`CMakeLists.txt` file, `lib` must be prepended and `.so` appended. + +### Test the plugin +It is recommended to test the new plugin with a simple example program while +debugging as well as setting the environment variable `LIBOMPTARGET_DEBUG`. +For final verification of the plugin there is a test suite which can be found +[here](https://crpl.cis.udel.edu/ompvvsollve/).