From 0a16c3256025927b80a56c418761962af5d85f7a Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 19 Mar 2025 15:42:47 -0500 Subject: [PATCH] [LLVM] Make the GPU loader utilities an LLVM tool Summary: These tools `amdhsa-loader` and `nvptx-loader` are used to execute unit tests directly on the GPU. We use this for `libc` and `libcxx` unit tests as well as general GPU experimentation. It looks like this. ```console > clang++ main.cpp --target=amdgcn-amd-amdhsa -mcpu=native -flto -lc ./lib/amdgcn-amd-amdhsa/crt1.o > llvm-gpu-loader a.out Hello World! ``` Currently these are a part of the `libc` project, but this creates issues as `libc` itself depends on them to run tests. Right now we get around this by force-including the `libc` project prior to running the runtimes build so that this dependency can be built first. We should instead just make this a simple LLVM tool so it's always available. This has the effect of installing these by default now instead of just when `libc` was enabled, but they should be relatively small. Right now this only supports a 'static' configuration. That is, we locate the CUDA and HSA dependencies at LLVM compile time. In the future we should be able to provide this by default using `dlopen` and some API. I don't know if it's required to reformat all of these names since they used the `libc` naming convention so I just left it for now. --- libc/CMakeLists.txt | 7 --- libc/utils/gpu/CMakeLists.txt | 1 - libc/utils/gpu/loader/CMakeLists.txt | 54 ------------------- libc/utils/gpu/loader/amdgpu/CMakeLists.txt | 10 ---- libc/utils/gpu/loader/nvptx/CMakeLists.txt | 9 ---- llvm/CMakeLists.txt | 4 -- llvm/runtimes/CMakeLists.txt | 14 ----- llvm/tools/llvm-gpu-loader/CMakeLists.txt | 45 ++++++++++++++++ .../tools/llvm-gpu-loader/amdhsa.cpp | 8 +-- .../tools/llvm-gpu-loader/llvm-gpu-loader.cpp | 46 +++++++++++++--- .../tools/llvm-gpu-loader/llvm-gpu-loader.h | 13 +++-- .../tools/llvm-gpu-loader/nvptx.cpp | 8 +-- 12 files changed, 102 insertions(+), 117 deletions(-) delete mode 100644 libc/utils/gpu/CMakeLists.txt delete mode 100644 libc/utils/gpu/loader/CMakeLists.txt delete mode 100644 libc/utils/gpu/loader/amdgpu/CMakeLists.txt delete mode 100644 libc/utils/gpu/loader/nvptx/CMakeLists.txt create mode 100644 llvm/tools/llvm-gpu-loader/CMakeLists.txt rename libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp => llvm/tools/llvm-gpu-loader/amdhsa.cpp (99%) rename libc/utils/gpu/loader/Main.cpp => llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp (76%) rename libc/utils/gpu/loader/Loader.h => llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h (93%) rename libc/utils/gpu/loader/nvptx/nvptx-loader.cpp => llvm/tools/llvm-gpu-loader/nvptx.cpp (98%) diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index ad39ff6fbcb1e..b264dcb4974c7 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -59,13 +59,6 @@ set(LIBC_NAMESPACE ${default_namespace} CACHE STRING "The namespace to use to enclose internal implementations. Must start with '__llvm_libc'." ) -# We will build the GPU utilities if we are not doing a runtimes build. -option(LIBC_BUILD_GPU_LOADER "Always build the GPU loader utilities" OFF) -if(LIBC_BUILD_GPU_LOADER OR ((NOT LLVM_RUNTIMES_BUILD) AND LLVM_LIBC_GPU_BUILD)) - add_subdirectory(utils/gpu) - return() -endif() - option(LIBC_CMAKE_VERBOSE_LOGGING "Log details warnings and notifications during CMake configuration." OFF) diff --git a/libc/utils/gpu/CMakeLists.txt b/libc/utils/gpu/CMakeLists.txt deleted file mode 100644 index e529646a1206e..0000000000000 --- a/libc/utils/gpu/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -add_subdirectory(loader) diff --git a/libc/utils/gpu/loader/CMakeLists.txt b/libc/utils/gpu/loader/CMakeLists.txt deleted file mode 100644 index 9b3bd009dc0f1..0000000000000 --- a/libc/utils/gpu/loader/CMakeLists.txt +++ /dev/null @@ -1,54 +0,0 @@ -add_library(gpu_loader OBJECT Main.cpp) - -include(FindLibcCommonUtils) -target_link_libraries(gpu_loader PUBLIC llvm-libc-common-utilities) - -target_include_directories(gpu_loader PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR} - ${LIBC_SOURCE_DIR}/include - ${LIBC_SOURCE_DIR} - ${LLVM_MAIN_INCLUDE_DIR} - ${LLVM_BINARY_DIR}/include -) -if(NOT LLVM_ENABLE_RTTI) - target_compile_options(gpu_loader PUBLIC -fno-rtti) -endif() - -find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm) -if(hsa-runtime64_FOUND) - add_subdirectory(amdgpu) -endif() - -# The CUDA loader requires LLVM to traverse the ELF image for symbols. -find_package(CUDAToolkit 11.2 QUIET) -if(CUDAToolkit_FOUND) - add_subdirectory(nvptx) -endif() - -if(TARGET amdhsa-loader AND LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) - add_custom_target(libc.utils.gpu.loader) - add_dependencies(libc.utils.gpu.loader amdhsa-loader) - set_target_properties( - libc.utils.gpu.loader - PROPERTIES - TARGET amdhsa-loader - EXECUTABLE "$" - ) -elseif(TARGET nvptx-loader AND LIBC_TARGET_ARCHITECTURE_IS_NVPTX) - add_custom_target(libc.utils.gpu.loader) - add_dependencies(libc.utils.gpu.loader nvptx-loader) - set_target_properties( - libc.utils.gpu.loader - PROPERTIES - TARGET nvptx-loader - EXECUTABLE "$" - ) -endif() - -foreach(gpu_loader_tgt amdhsa-loader nvptx-loader) - if(TARGET ${gpu_loader_tgt}) - install(TARGETS ${gpu_loader_tgt} - DESTINATION ${CMAKE_INSTALL_BINDIR} - COMPONENT libc) - endif() -endforeach() diff --git a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt b/libc/utils/gpu/loader/amdgpu/CMakeLists.txt deleted file mode 100644 index 17878daf0b6fe..0000000000000 --- a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -set(LLVM_LINK_COMPONENTS - BinaryFormat - Object - Option - Support - FrontendOffloading - ) - -add_llvm_executable(amdhsa-loader amdhsa-loader.cpp) -target_link_libraries(amdhsa-loader PRIVATE hsa-runtime64::hsa-runtime64 gpu_loader) diff --git a/libc/utils/gpu/loader/nvptx/CMakeLists.txt b/libc/utils/gpu/loader/nvptx/CMakeLists.txt deleted file mode 100644 index 42510ac31dad4..0000000000000 --- a/libc/utils/gpu/loader/nvptx/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -set(LLVM_LINK_COMPONENTS - BinaryFormat - Object - Option - Support - ) - -add_llvm_executable(nvptx-loader nvptx-loader.cpp) -target_link_libraries(nvptx-loader PRIVATE gpu_loader CUDA::cuda_driver) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 18b6ee85fae8d..f9ace9f078e2b 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -210,10 +210,6 @@ if("${LIBC_TARGET_TRIPLE}" STREQUAL "amdgcn-amd-amdhsa" OR "${LIBC_TARGET_TRIPLE}" STREQUAL "nvptx64-nvidia-cuda") set(LLVM_LIBC_GPU_BUILD ON) endif() -if (NOT "libc" IN_LIST LLVM_ENABLE_PROJECTS AND LLVM_LIBC_GPU_BUILD) - message(STATUS "Enabling libc project to build libc testing tools") - list(APPEND LLVM_ENABLE_PROJECTS "libc") -endif() # LLVM_ENABLE_PROJECTS_USED is `ON` if the user has ever used the # `LLVM_ENABLE_PROJECTS` CMake cache variable. This exists for diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 136099dc48ab8..51433d1ec9831 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -534,20 +534,6 @@ if(build_runtimes) endif() if(LLVM_LIBC_GPU_BUILD) list(APPEND extra_cmake_args "-DLLVM_LIBC_GPU_BUILD=ON") - if("libc" IN_LIST RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES) - if(TARGET amdhsa-loader) - list(APPEND extra_cmake_args - "-DRUNTIMES_amdgcn-amd-amdhsa_LIBC_GPU_LOADER_EXECUTABLE=$") - list(APPEND extra_deps amdhsa-loader) - endif() - endif() - if("libc" IN_LIST RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES) - if(TARGET nvptx-loader) - list(APPEND extra_cmake_args - "-DRUNTIMES_nvptx64-nvidia-cuda_LIBC_GPU_LOADER_EXECUTABLE=$") - list(APPEND extra_deps nvptx-loader) - endif() - endif() if(TARGET clang-offload-packager) list(APPEND extra_deps clang-offload-packager) endif() diff --git a/llvm/tools/llvm-gpu-loader/CMakeLists.txt b/llvm/tools/llvm-gpu-loader/CMakeLists.txt new file mode 100644 index 0000000000000..4b4a6e72e47ae --- /dev/null +++ b/llvm/tools/llvm-gpu-loader/CMakeLists.txt @@ -0,0 +1,45 @@ +set(LLVM_LINK_COMPONENTS + BinaryFormat + Object + Option + Support + FrontendOffloading +) + +add_llvm_tool(llvm-gpu-loader + llvm-gpu-loader.cpp + + # TODO: We intentionally split this currently due to statically linking the + # GPU runtimes. Dynamically load the dependencies, possibly using the + # LLVM offloading API when it is complete. + PARTIAL_SOURCES_INTENDED + + DEPENDS + intrinsics_gen +) + +# Locate the RPC server handling interface. +include(FindLibcCommonUtils) +target_link_libraries(llvm-gpu-loader PUBLIC llvm-libc-common-utilities) + +# Check for HSA support for targeting AMD GPUs. +find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm) +if(hsa-runtime64_FOUND) + target_sources(llvm-gpu-loader PRIVATE amdhsa.cpp) + target_compile_definitions(llvm-gpu-loader PRIVATE AMDHSA_SUPPORT) + target_link_libraries(llvm-gpu-loader PRIVATE hsa-runtime64::hsa-runtime64) + + # Compatibility with the old amdhsa-loader name. + add_llvm_tool_symlink(amdhsa-loader llvm-gpu-loader) +endif() + +# Check for CUDA support for targeting NVIDIA GPUs. +find_package(CUDAToolkit 11.2 QUIET) +if(CUDAToolkit_FOUND) + target_sources(llvm-gpu-loader PRIVATE nvptx.cpp) + target_compile_definitions(llvm-gpu-loader PRIVATE NVPTX_SUPPORT) + target_link_libraries(llvm-gpu-loader PRIVATE CUDA::cuda_driver) + + # Compatibility with the old nvptx-loader name. + add_llvm_tool_symlink(nvptx-loader llvm-gpu-loader) +endif() diff --git a/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp b/llvm/tools/llvm-gpu-loader/amdhsa.cpp similarity index 99% rename from libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp rename to llvm/tools/llvm-gpu-loader/amdhsa.cpp index 00fde147b0abd..f3c8f646b6421 100644 --- a/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp +++ b/llvm/tools/llvm-gpu-loader/amdhsa.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#include "Loader.h" +#include "llvm-gpu-loader.h" #include "hsa/hsa.h" #include "hsa/hsa_ext_amd.h" @@ -330,9 +330,9 @@ static hsa_status_t hsa_memcpy(void *dst, hsa_agent_t dst_agent, return HSA_STATUS_SUCCESS; } -int load(int argc, const char **argv, const char **envp, void *image, - size_t size, const LaunchParameters ¶ms, - bool print_resource_usage) { +int load_amdhsa(int argc, const char **argv, const char **envp, void *image, + size_t size, const LaunchParameters ¶ms, + bool print_resource_usage) { // Initialize the HSA runtime used to communicate with the device. if (hsa_status_t err = hsa_init()) handle_error(err); diff --git a/libc/utils/gpu/loader/Main.cpp b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp similarity index 76% rename from libc/utils/gpu/loader/Main.cpp rename to llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp index c3aeeffd56368..9b157e3f9dcb1 100644 --- a/libc/utils/gpu/loader/Main.cpp +++ b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp @@ -6,14 +6,17 @@ // //===----------------------------------------------------------------------===// // -// This file opens a device image passed on the command line and passes it to -// one of the loader implementations for launch. +// This utility is used to launch standard programs onto the GPU in conjunction +// with the LLVM 'libc' project. It is designed to mimic a standard emulator +// workflow, allowing for unit tests to be run on the GPU directly. // //===----------------------------------------------------------------------===// -#include "Loader.h" +#include "llvm-gpu-loader.h" #include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/ELF.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" @@ -21,6 +24,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Signals.h" #include "llvm/Support/WithColor.h" +#include "llvm/TargetParser/Triple.h" #include #include @@ -125,12 +129,40 @@ int main(int argc, const char **argv, const char **envp) { strerror(errno))); } - // Drop the loader from the program arguments. LaunchParameters params{threads_x, threads_y, threads_z, blocks_x, blocks_y, blocks_z}; - int ret = load(new_argv.size(), new_argv.data(), envp, - const_cast(image.getBufferStart()), - image.getBufferSize(), params, print_resource_usage); + + Expected elf_or_err = + llvm::object::ELF64LEObjectFile::create(image); + if (!elf_or_err) + report_error(std::move(elf_or_err.takeError())); + + int ret = 1; + if (elf_or_err->getArch() == Triple::amdgcn) { +#ifdef AMDHSA_SUPPORT + ret = load_amdhsa(new_argv.size(), new_argv.data(), envp, + const_cast(image.getBufferStart()), + image.getBufferSize(), params, print_resource_usage); +#else + report_error(createStringError( + "Unsupported architecture; %s", + Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin())); +#endif + } else if (elf_or_err->getArch() == Triple::nvptx64) { +#ifdef NVPTX_SUPPORT + ret = load_nvptx(new_argv.size(), new_argv.data(), envp, + const_cast(image.getBufferStart()), + image.getBufferSize(), params, print_resource_usage); +#else + report_error(createStringError( + "Unsupported architecture; %s", + Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin())); +#endif + } else { + report_error(createStringError( + "Unsupported architecture; %s", + Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin())); + } if (no_parallelism) { if (flock(fd, LOCK_UN) == -1) diff --git a/libc/utils/gpu/loader/Loader.h b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h similarity index 93% rename from libc/utils/gpu/loader/Loader.h rename to llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h index ec05117a041ab..29da395e3bc20 100644 --- a/libc/utils/gpu/loader/Loader.h +++ b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h @@ -54,9 +54,16 @@ struct end_args_t { /// Generic interface to load the \p image and launch execution of the _start /// kernel on the target device. Copies \p argc and \p argv to the device. /// Returns the final value of the `main` function on the device. -int load(int argc, const char **argv, const char **evnp, void *image, - size_t size, const LaunchParameters ¶ms, - bool print_resource_usage); +#ifdef AMDHSA_SUPPORT +int load_amdhsa(int argc, const char **argv, const char **evnp, void *image, + size_t size, const LaunchParameters ¶ms, + bool print_resource_usage); +#endif +#ifdef NVPTX_SUPPORT +int load_nvptx(int argc, const char **argv, const char **evnp, void *image, + size_t size, const LaunchParameters ¶ms, + bool print_resource_usage); +#endif /// Return \p V aligned "upwards" according to \p Align. template inline V align_up(V val, A align) { diff --git a/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp b/llvm/tools/llvm-gpu-loader/nvptx.cpp similarity index 98% rename from libc/utils/gpu/loader/nvptx/nvptx-loader.cpp rename to llvm/tools/llvm-gpu-loader/nvptx.cpp index 7d6c176c6f360..f7495605ecc68 100644 --- a/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp +++ b/llvm/tools/llvm-gpu-loader/nvptx.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#include "Loader.h" +#include "llvm-gpu-loader.h" #include "cuda.h" @@ -236,9 +236,9 @@ CUresult launch_kernel(CUmodule binary, CUstream stream, rpc::Server &server, return CUDA_SUCCESS; } -int load(int argc, const char **argv, const char **envp, void *image, - size_t size, const LaunchParameters ¶ms, - bool print_resource_usage) { +int load_nvptx(int argc, const char **argv, const char **envp, void *image, + size_t size, const LaunchParameters ¶ms, + bool print_resource_usage) { if (CUresult err = cuInit(0)) handle_error(err); // Obtain the first device found on the system.