From 3668c0a7284d0d19a913476b2847fcc6b657d848 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Tue, 13 Aug 2024 14:39:53 +0100 Subject: [PATCH 1/5] Create one bitcode library for NVPTX Create one single bitcode library for NVPTX by compiling each libdev file into bitcode first, linking these together and running opt on them. Strip away metadata by reusing prepare_builtins from libclc. Remove NVPTX bundles from the libdev object files and remove any unbundling action spawned by the Clang driver for the SYCL toolchain when compiling for the NVPTX backend. Make the driver link against the single bitcode libraries for NVPTX for the SYCL toolchain when device library linkage is not excluded. Ensure that the clang tests check for the correctness of the new clang driver actions and check if the driver still links the device code against the itt device libraries when device library linkage has been excluded. Refactor SYCLLibdevice.cmake by creating functions and grouping e.g. the same compilation flags for a filetype together in one variable. Reuse these variables and call functions to remove redundancies. --- clang/lib/Driver/Driver.cpp | 7 +- clang/lib/Driver/ToolChains/SYCL.cpp | 33 +- .../test/CodeGenSYCL/sycl-libdevice-cmath.cpp | 2 +- clang/test/Driver/sycl-offload-nvptx.cpp | 76 +- libclc/CMakeLists.txt | 2 +- libclc/cmake/modules/AddLibclc.cmake | 91 +- libdevice/CMakeLists.txt | 2 + libdevice/cmake/modules/SYCLLibdevice.cmake | 861 +++++++++--------- 8 files changed, 538 insertions(+), 536 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index f8f2ff94e2a2c..7c31a980d4a5b 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5848,10 +5848,9 @@ class OffloadingActionBuilder final { ++NumOfDeviceLibLinked; Arg *InputArg = MakeInputArg(Args, C.getDriver().getOpts(), Args.MakeArgString(LibName)); - if (TC->getTriple().isNVPTX() || - (TC->getTriple().isSPIR() && - TC->getTriple().getSubArch() == - llvm::Triple::SPIRSubArch_fpga)) { + if (TC->getTriple().isSPIR() && + TC->getTriple().getSubArch() == + llvm::Triple::SPIRSubArch_fpga) { auto *SYCLDeviceLibsInputAction = C.MakeAction(*InputArg, types::TY_Object); auto *SYCLDeviceLibsUnbundleAction = diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 37589f00c4140..dc1916cf6c1d5 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -212,6 +212,10 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, SmallVector LibraryList; const llvm::opt::ArgList &Args = C.getArgs(); + // For NVPTX we only use one single bitcode library and ignore + // manually specified SYCL device libraries. + bool IgnoreSingleLibs = TargetTriple.isNVPTX(); + struct DeviceLibOptInfo { StringRef DeviceLibName; StringRef DeviceLibOption; @@ -233,10 +237,13 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, if (A->getOption().matches(options::OPT_fno_sycl_device_lib_EQ)) NoDeviceLibs = true; + bool PrintUnusedLibWarning = false; for (StringRef Val : A->getValues()) { if (Val == "all") { for (const auto &K : DeviceLibLinkInfo.keys()) - DeviceLibLinkInfo[K] = true && (!NoDeviceLibs || K == "internal"); + DeviceLibLinkInfo[K] = (!IgnoreSingleLibs && !NoDeviceLibs) || + (K == "internal" && NoDeviceLibs); + PrintUnusedLibWarning = false; break; } auto LinkInfoIter = DeviceLibLinkInfo.find(Val); @@ -247,10 +254,21 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, C.getDriver().Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << Val; } - DeviceLibLinkInfo[Val] = true && !NoDeviceLibs; + DeviceLibLinkInfo[Val] = !NoDeviceLibs && !IgnoreSingleLibs; + PrintUnusedLibWarning = IgnoreSingleLibs && !NoDeviceLibs; } + if (PrintUnusedLibWarning) + C.getDriver().Diag(diag::warn_ignored_clang_option) + << A->getSpelling() << A->getAsString(Args); } } + + if (TargetTriple.isNVPTX() && !NoDeviceLibs) + LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc")); + + if (IgnoreSingleLibs && !NoDeviceLibs) + return LibraryList; + using SYCLDeviceLibsList = SmallVector; const SYCLDeviceLibsList SYCLDeviceWrapperLibs = { @@ -304,10 +322,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment(); bool IsNewOffload = C.getDriver().getUseNewOffloadingDriver(); StringRef LibSuffix = ".bc"; - if (TargetTriple.isNVPTX() || - (TargetTriple.isSPIR() && - TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga)) - // For NVidia or FPGA, we are unbundling objects. + if (TargetTriple.isSPIR() && + TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) + // For FPGA, we are unbundling objects. LibSuffix = IsWindowsMSVCEnv ? ".obj" : ".o"; if (IsNewOffload) // For new offload model, we use packaged .bc files. @@ -323,7 +340,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, }; addLibraries(SYCLDeviceWrapperLibs); - if (IsSpirvAOT || TargetTriple.isNVPTX()) + if (IsSpirvAOT) addLibraries(SYCLDeviceFallbackLibs); bool NativeBfloatLibs; @@ -551,7 +568,7 @@ const char *SYCL::Linker::constructLLVMLinkCommand( this->getToolChain().getTriple().getSubArch() == llvm::Triple::SPIRSubArch_fpga; StringRef LibPostfix = ".bc"; - if (IsNVPTX || IsFPGA) { + if (IsFPGA) { LibPostfix = ".o"; if (HostTC->getTriple().isWindowsMSVCEnvironment() && C.getDriver().IsCLMode()) diff --git a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp b/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp index af94dada263d1..2a2043ac5dc55 100644 --- a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp +++ b/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp @@ -5,7 +5,7 @@ // intrinsics. This allows the driver to link in the libdevice definitions for // cosf etc. later in the driver flow. -// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s #include "Inputs/sycl.hpp" diff --git a/clang/test/Driver/sycl-offload-nvptx.cpp b/clang/test/Driver/sycl-offload-nvptx.cpp index db5e024363b02..324c5aa9cdd0e 100644 --- a/clang/test/Driver/sycl-offload-nvptx.cpp +++ b/clang/test/Driver/sycl-offload-nvptx.cpp @@ -53,28 +53,22 @@ // CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl) // CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl) // CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.o{{.*}}", object -// CHK-PHASES-NO-CC: 11: clang-offload-unbundler, {10}, object -// CHK-PHASES-NO-CC: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object -// CHK-PHASES-NO-CC: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o{{.*}}", object -// CHK-PHASES-NO-CC: 14: clang-offload-unbundler, {13}, object -// CHK-PHASES-NO-CC: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object -// CHK-PHASES-NO-CC: 16: input, "{{.*}}libsycl-itt-stubs.o{{.*}}", object -// CHK-PHASES-NO-CC: 17: clang-offload-unbundler, {16}, object -// CHK-PHASES-NO-CC: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object -// CHK-PHASES-NO-CC: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 22: sycl-post-link, {21}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 23: file-table-tform, {22}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 24: backend, {23}, assembler, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 25: assembler, {24}, object, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {29}, object -// CHK-PHASES-NO-CC: 31: linker, {8, 30}, image, (host-sycl) +// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 16: sycl-post-link, {15}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 17: file-table-tform, {16}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 18: backend, {17}, assembler, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 19: assembler, {18}, object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {23}, object +// CHK-PHASES-NO-CC: 25: linker, {8, 24}, image, (host-sycl) // /// Check phases specifying a compute capability. // RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \ @@ -97,28 +91,22 @@ // CHK-PHASES: 7: backend, {6}, assembler, (host-sycl) // CHK-PHASES: 8: assembler, {7}, object, (host-sycl) // CHK-PHASES: 9: linker, {4}, ir, (device-sycl, sm_35) -// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.o", object -// CHK-PHASES: 11: clang-offload-unbundler, {10}, object -// CHK-PHASES: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object -// CHK-PHASES: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o", object -// CHK-PHASES: 14: clang-offload-unbundler, {13}, object -// CHK-PHASES: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object -// CHK-PHASES: 16: input, "{{.*}}libsycl-itt-stubs.o", object -// CHK-PHASES: 17: clang-offload-unbundler, {16}, object -// CHK-PHASES: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object -// CHK-PHASES: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35) -// CHK-PHASES: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35) -// CHK-PHASES: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_35) - // CHK-PHASES: 22: sycl-post-link, {21}, ir, (device-sycl, sm_35) -// CHK-PHASES: 23: file-table-tform, {22}, ir, (device-sycl, sm_35) -// CHK-PHASES: 24: backend, {23}, assembler, (device-sycl, sm_35) -// CHK-PHASES: 25: assembler, {24}, object, (device-sycl, sm_35) -// CHK-PHASES: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_35) -// CHK-PHASES: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_35) -// CHK-PHASES: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_35) -// CHK-PHASES: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_35) -// CHK-PHASES: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {29}, object -// CHK-PHASES: 31: linker, {8, 30}, image, (host-sycl) +// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_35) +// CHK-PHASES: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_35) +// CHK-PHASES: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_35) +// CHK-PHASES: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35) +// CHK-PHASES: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35) +// CHK-PHASES: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_35) +// CHK-PHASES: 16: sycl-post-link, {15}, ir, (device-sycl, sm_35) +// CHK-PHASES: 17: file-table-tform, {16}, ir, (device-sycl, sm_35) +// CHK-PHASES: 18: backend, {17}, assembler, (device-sycl, sm_35) +// CHK-PHASES: 19: assembler, {18}, object, (device-sycl, sm_35) +// CHK-PHASES: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_35) +// CHK-PHASES: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_35) +// CHK-PHASES: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_35) +// CHK-PHASES: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_35) +// CHK-PHASES: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {23}, object +// CHK-PHASES: 25: linker, {8, 24}, image, (host-sycl) /// Check calling preprocessor only // RUN: %clangxx -E -fsycl -fsycl-targets=nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \ diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index f05492d777977..dff0f66ba25b9 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -233,6 +233,7 @@ if( ENABLE_RUNTIME_SUBNORMAL ) foreach( file subnormal_use_default subnormal_disable ) link_bc( TARGET ${file} + RSP_DIR ${LIBCLC_ARCH_OBJFILE_DIR} INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/${file}.ll ) install( FILES $ ARCHIVE @@ -405,7 +406,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) # Enable SPIR-V builtin function declarations, so they don't # have to be explicity declared in the soruce. list( APPEND flags -Xclang -fdeclare-spirv-builtins) - set( LIBCLC_ARCH_OBJFILE_DIR "${LIBCLC_OBJFILE_DIR}/${arch_suffix}" ) file( MAKE_DIRECTORY ${LIBCLC_ARCH_OBJFILE_DIR} ) diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index 2a843dd67fb8c..4711b9eb3e3b8 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -86,12 +86,15 @@ endfunction() # Custom target to create # * INPUT ... # List of bytecode files to link together +# * RSP_DIR +# Directory where a response file should be placed +# (Only needed for WIN32 or CYGWIN) # * DEPENDENCIES ... # List of extra dependencies to inject function(link_bc) cmake_parse_arguments(ARG "" - "TARGET" + "TARGET;RSP_DIR" "INPUTS;DEPENDENCIES" ${ARGN} ) @@ -100,7 +103,7 @@ function(link_bc) if( WIN32 OR CYGWIN ) # Create a response file in case the number of inputs exceeds command-line # character limits on certain platforms. - file( TO_CMAKE_PATH ${LIBCLC_ARCH_OBJFILE_DIR}/${ARG_TARGET}.rsp RSP_FILE ) + file( TO_CMAKE_PATH ${ARG_RSP_DIR}/${ARG_TARGET}.rsp RSP_FILE ) # Turn it into a space-separate list of input files list( JOIN ARG_INPUTS " " RSP_INPUT ) file( WRITE ${RSP_FILE} ${RSP_INPUT} ) @@ -216,6 +219,50 @@ function(add_libclc_alias alias target) endfunction(add_libclc_alias alias target) +# Runs opt and prepare-builtins on a bitcode file specified by lib_tgt +# +# ARGUMENTS: +# * LIB_TGT string +# Target name that becomes dependent on the out file named LIB_TGT.bc +# * IN_FILE string +# Target name of the input bytecode file +# * OUT_DIR string +# Name of the directory where the output should be placed +# * DEPENDENCIES ... +# List of extra dependencies to inject +function(process_bc out_file) + cmake_parse_arguments(ARG + "" + "LIB_TGT;IN_FILE;OUT_DIR" + "OPT_FLAGS;DEPENDENCIES" + ${ARGN}) + add_custom_command( OUTPUT ${ARG_LIB_TGT}.bc + COMMAND ${opt_exe} ${ARG_OPT_FLAGS} -o ${ARG_LIB_TGT}.bc + ${ARG_IN_FILE} + DEPENDS ${opt_target} ${ARG_IN_FILE} ${ARG_DEPENDENCIES} + ) + add_custom_target( ${ARG_LIB_TGT} + ALL DEPENDS ${ARG_LIB_TGT}.bc + ) + set_target_properties( ${ARG_LIB_TGT} + PROPERTIES TARGET_FILE ${ARG_LIB_TGT}.bc + ) + + set( builtins_opt_lib $ ) + + # Add prepare target + add_custom_command( OUTPUT ${ARG_OUT_DIR}/${out_file} + COMMAND ${prepare_builtins_exe} -o ${ARG_OUT_DIR}/${out_file} + ${builtins_opt_lib} + DEPENDS ${builtins_opt_lib} ${ARG_LIB_TGT} ${prepare_builtins_target} ) + add_custom_target( prepare-${out_file} ALL + DEPENDS ${ARG_OUT_DIR}/${out_file} + ) + set_target_properties( prepare-${out_file} + PROPERTIES TARGET_FILE ${ARG_OUT_DIR}/${out_file} + ) +endfunction() + # add_libclc_builtin_set(arch_suffix # TRIPLE string # Triple used to compile @@ -291,44 +338,28 @@ macro(add_libclc_builtin_set arch_suffix) link_bc( TARGET ${builtins_link_lib_tgt} INPUTS ${bytecode_files} + RSP_DIR ${LIBCLC_ARCH_OBJFILE_DIR} DEPENDENCIES ${builtins_comp_lib_tgt} ) set( builtins_link_lib $ ) - set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} ) + add_custom_command( OUTPUT ${LIBCLC_LIBRARY_OUTPUT_INTDIR} + COMMAND ${CMAKE_COMMAND} -E make_directory ${LIBCLC_LIBRARY_OUTPUT_INTDIR} + DEPENDS ${builtins_link_lib} prepare_builtins ) - # Add opt target - add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc - COMMAND ${opt_exe} ${ARG_OPT_FLAGS} -o ${builtins_opt_lib_tgt}.bc - ${builtins_link_lib} - DEPENDS ${opt_target} ${builtins_link_lib} ${builtins_link_lib_tgt} - ) - add_custom_target( ${builtins_opt_lib_tgt} - ALL DEPENDS ${builtins_opt_lib_tgt}.bc - ) - set_target_properties( ${builtins_opt_lib_tgt} - PROPERTIES TARGET_FILE ${builtins_opt_lib_tgt}.bc - ) + set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} ) - set( builtins_opt_lib $ ) - - # Add prepare target - set( obj_suffix ${arch_suffix}.bc ) - add_custom_command( OUTPUT ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - COMMAND ${CMAKE_COMMAND} -E make_directory ${LIBCLC_LIBRARY_OUTPUT_INTDIR} - COMMAND ${prepare_builtins_exe} -o ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - ${builtins_opt_lib} - DEPENDS ${builtins_opt_lib} ${builtins_opt_lib_tgt} ${prepare_builtins_target} ) - add_custom_target( prepare-${obj_suffix} ALL - DEPENDS ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - ) - set_target_properties( prepare-${obj_suffix} - PROPERTIES TARGET_FILE ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - ) + process_bc(${arch_suffix}.bc + LIB_TGT ${builtins_opt_lib_tgt} + IN_FILE ${builtins_link_lib} + OUT_DIR ${LIBCLC_LIBRARY_OUTPUT_INTDIR} + OPT_FLAGS ${ARG_OPT_FLAGS} + DEPENDENCIES ${builtins_link_lib_tgt}) # Add dependency to top-level pseudo target to ease making other # targets dependent on libclc. + set( obj_suffix ${arch_suffix}.bc ) add_dependencies(${ARG_PARENT_TARGET} prepare-${obj_suffix}) set( builtins_lib $ ) diff --git a/libdevice/CMakeLists.txt b/libdevice/CMakeLists.txt index b6078f9a44cf8..564391547677f 100644 --- a/libdevice/CMakeLists.txt +++ b/libdevice/CMakeLists.txt @@ -1,6 +1,8 @@ # Utility project providing various functionalities for SPIR-V devices # without native support of these functionalities. +include(${CMAKE_CURRENT_SOURCE_DIR}/../libclc/cmake/modules/AddLibclc.cmake) + set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index c1aac6d017eff..f43c668ad9188 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -1,26 +1,31 @@ set(obj_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") -set(obj_new_offload_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") +set(obj-new-offload_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") if (MSVC) - set(lib-suffix obj) - set(new-offload-lib-suffix new.obj) + set(obj-suffix obj) + set(obj-new-offload-suffix new.obj) set(spv_binary_dir "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") set(install_dest_spv bin) - set(devicelib_host_static sycl-devicelib-host.lib) - set(devicelib_host_static_new_offload sycl-devicelib-host.new.lib) + set(devicelib_host_static_obj sycl-devicelib-host.lib) + set(devicelib_host_static_obj-new-offload sycl-devicelib-host.new.lib) else() - set(lib-suffix o) - set(new-offload-lib-suffix new.o) + set(obj-suffix o) + set(obj-new-offload-suffix new.o) set(spv_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") set(install_dest_spv lib${LLVM_LIBDIR_SUFFIX}) - set(devicelib_host_static libsycl-devicelib-host.a) - set(devicelib_host_static_new_offload libsycl-devicelib-host.new.a) + set(devicelib_host_static_obj libsycl-devicelib-host.a) + set(devicelib_host_static_obj-new-offload libsycl-devicelib-host.new.a) endif() +set(spv-suffix spv) +set(bc-suffix bc) set(bc_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") -set(install_dest_lib lib${LLVM_LIBDIR_SUFFIX}) +set(install_dest_obj lib${LLVM_LIBDIR_SUFFIX}) +set(install_dest_obj-new-offload lib${LLVM_LIBDIR_SUFFIX}) set(install_dest_bc lib${LLVM_LIBDIR_SUFFIX}) set(clang $) set(llvm-ar $) +set(llvm-link $) +set(llvm-opt $) string(CONCAT sycl_targets_opt "-fsycl-targets=" @@ -46,112 +51,142 @@ if (NOT SYCL_LIBDEVICE_GCC_TOOLCHAIN STREQUAL "") list(APPEND compile_opts "--gcc-toolchain=${SYCL_LIBDEVICE_GCC_TOOLCHAIN}") endif() -if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) - string(APPEND sycl_targets_opt ",nvptx64-nvidia-cuda") - list(APPEND compile_opts - "-fno-sycl-libspirv" - "-fno-bundle-offload-arch" - "-nocudalib" - "--cuda-gpu-arch=sm_50") -endif() - if (WIN32) list(APPEND compile_opts -D_ALLOW_RUNTIME_LIBRARY_MISMATCH) list(APPEND compile_opts -D_ALLOW_ITERATOR_DEBUG_LEVEL_MISMATCH) endif() -add_custom_target(libsycldevice-obj) -add_custom_target(libsycldevice-obj-new-offload) -add_custom_target(libsycldevice-spv) -add_custom_target(libsycldevice-bc) - -add_custom_target(libsycldevice DEPENDS - libsycldevice-obj - libsycldevice-bc - libsycldevice-obj-new-offload - libsycldevice-spv) - -function(add_devicelib_obj obj_filename) - cmake_parse_arguments(OBJ "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - set(devicelib-obj-file ${obj_binary_dir}/${obj_filename}.${lib-suffix}) - add_custom_command(OUTPUT ${devicelib-obj-file} - COMMAND ${clang} -fsycl -c - ${compile_opts} ${sycl_targets_opt} ${OBJ_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${OBJ_SRC} - -o ${devicelib-obj-file} - MAIN_DEPENDENCY ${OBJ_SRC} - DEPENDS ${OBJ_DEP} - VERBATIM) - set(devicelib-obj-target ${obj_filename}-obj) - add_custom_target(${devicelib-obj-target} DEPENDS ${devicelib-obj-file}) - add_dependencies(libsycldevice-obj ${devicelib-obj-target}) - install(FILES ${devicelib-obj-file} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) - - set(devicelib-obj-file-new-offload ${obj_new_offload_binary_dir}/${obj_filename}.${new-offload-lib-suffix}) - add_custom_command(OUTPUT ${devicelib-obj-file-new-offload} - COMMAND ${clang} -fsycl -c --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} ${OBJ_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${OBJ_SRC} - -o ${devicelib-obj-file-new-offload} - MAIN_DEPENDENCY ${OBJ_SRC} - DEPENDS ${OBJ_DEP} - VERBATIM) - set(devicelib-obj-target-new-offload ${obj_filename}-new-offload-obj) - add_custom_target(${devicelib-obj-target-new-offload} DEPENDS ${devicelib-obj-file-new-offload}) - add_dependencies(libsycldevice-obj ${devicelib-obj-target-new-offload}) - install(FILES ${devicelib-obj-file-new-offload} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) -endfunction() +add_custom_target(libsycldevice) + +set(filetypes obj obj-new-offload spv bc) + +foreach(filetype IN LISTS filetypes) + add_custom_target(libsycldevice-${filetype}) + add_dependencies(libsycldevice libsycldevice-${filetype}) +endforeach() -function(add_devicelib_spv spv_filename) - cmake_parse_arguments(SPV "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - set(devicelib-spv-file ${spv_binary_dir}/${spv_filename}.spv) - add_custom_command(OUTPUT ${devicelib-spv-file} - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} ${SPV_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${SPV_SRC} - -o ${devicelib-spv-file} - MAIN_DEPENDENCY ${SPV_SRC} - DEPENDS ${SPV_DEP} - VERBATIM) - set(devicelib-spv-target ${spv_filename}-spv) - add_custom_target(${devicelib-spv-target} DEPENDS ${devicelib-spv-file}) - add_dependencies(libsycldevice-spv ${devicelib-spv-target}) - install(FILES ${devicelib-spv-file} - DESTINATION ${install_dest_spv} - COMPONENT libsycldevice) +# For NVPTX each device libary is compiled into a single bitcode +# file and all files created this way are linked into one large bitcode +# library. +# Additional compilation options are needed for compiling each device library. +set(devicelib_arch) +if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) + list(APPEND devicelib_arch cuda) + set(compile_opts_cuda "-fsycl-targets=nvptx64-nvidia-cuda" + "-Xsycl-target-backend" "--cuda-gpu-arch=sm_50" "-nocudalib") + set(opt_flags_cuda "-O3" "--nvvm-reflect-enable=false") +endif() + +set(spv_device_compile_opts -fsycl-device-only -fsycl-device-obj=spirv) +set(bc_device_compile_opts -fsycl-device-only -fsycl-device-obj=llvmir) +set(obj-new-offload_device_compile_opts -fsycl -c --offload-new-driver + -foffload-lto=thin ${sycl_targets_opt}) +set(obj_device_compile_opts -fsycl -c ${sycl_targets_opt}) + +# Compiles and installs a single device library. +# +# Arguments: +# * FILETYPE +# Specifies the output file type of the compilation and its repsective +# installation directory. +# Adds a new target that the libsycldevice-FILETYPE target will depend on. +# * SRC ... +# Source code files needed for the compilation. +# * EXTRA_OPTS ... +# List of extra compiler options to use. +# Note that the ones specified by the compile_opts var are always used. +# * DEPENDENCIES ... +# List of extra dependencies to inject +# +# Depends on the clang target for compiling. +function(compile_lib filename) + cmake_parse_arguments(ARG + "" + "FILETYPE" + "SRC;EXTRA_OPTS;DEPENDENCIES" + ${ARGN}) + + set(devicelib-file + ${${ARG_FILETYPE}_binary_dir}/${filename}.${${ARG_FILETYPE}-suffix}) + + add_custom_command( + OUTPUT ${devicelib-file} + COMMAND ${clang} ${compile_opts} ${ARG_EXTRA_OPTS} + ${CMAKE_CURRENT_SOURCE_DIR}/${ARG_SRC} -o ${devicelib-file} + MAIN_DEPENDENCY ${ARG_SRC} + DEPENDS ${ARG_DEPENDENCIES} + VERBATIM + ) + set(devicelib-${ARG_FILETYPE}-target ${filename}-${ARG_FILETYPE}) + add_custom_target(${devicelib-${ARG_FILETYPE}-target} + DEPENDS ${devicelib-file}) + add_dependencies(libsycldevice-${ARG_FILETYPE} + ${devicelib-${ARG_FILETYPE}-target}) + + install( FILES ${devicelib-file} + DESTINATION ${install_dest_${ARG_FILETYPE}} + COMPONENT libsycldevice) endfunction() -function(add_devicelib_bc bc_filename) - cmake_parse_arguments(BC "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - set(devicelib-bc-file ${bc_binary_dir}/${bc_filename}.bc) - add_custom_command(OUTPUT ${devicelib-bc-file} - COMMAND ${clang} -fsycl-device-only - -fsycl-device-obj=llvmir ${compile_opts} - ${BC_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${BC_SRC} - -o ${devicelib-bc-file} - MAIN_DEPENDENCY ${BC_SRC} - DEPENDS ${BC_DEP} - VERBATIM) - set(devicelib-bc-target ${bc_filename}-bc) - add_custom_target(${devicelib-bc-target} DEPENDS ${devicelib-bc-file}) - add_dependencies(libsycldevice-bc ${devicelib-bc-target}) - install(FILES ${devicelib-bc-file} - DESTINATION ${install_dest_bc} - COMPONENT libsycldevice) +# Appends a list to a global property. +# +# Arguments: +# * PROPERTY_NAME +# The name of the property to append to. +function(append_to_property list) + cmake_parse_arguments(ARG + "" + "PROPERTY_NAME" + "" + ${ARGN}) + get_property(new_property GLOBAL PROPERTY ${ARG_PROPERTY_NAME}) + list(APPEND new_property ${list}) + set_property(GLOBAL PROPERTY ${ARG_PROPERTY_NAME} ${new_property}) endfunction() -function(add_devicelib filename) - cmake_parse_arguments(DL "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - add_devicelib_spv(${filename} SRC ${DL_SRC} DEP ${DL_DEP} EXTRA_ARGS ${DL_EXTRA_ARGS}) - add_devicelib_bc(${filename} SRC ${DL_SRC} DEP ${DL_DEP} EXTRA_ARGS ${DL_EXTRA_ARGS}) - add_devicelib_obj(${filename} SRC ${DL_SRC} DEP ${DL_DEP} EXTRA_ARGS ${DL_EXTRA_ARGS}) +# Creates device libaries for all filetypes. +# Adds bitcode library files additionally for each devicelib_arch target and +# adds the created file to an arch specific global property. +# +# Arguments: +# * SRC ... +# Source code files needed for the compilation. +# * EXTRA_OPTS ... +# List of extra compiler options to use. +# Note that the ones specified by the compile_opts var are always used. +# * DEPENDENCIES ... +# List of extra dependencies to inject +# +# Depends on the clang target for compiling. +function(add_devicelibs filename) + cmake_parse_arguments(ARG + "" + "" + "SRC;EXTRA_OPTS;DEPENDENCIES" + ${ARGN}) + + foreach(filetype IN LISTS filetypes) + compile_lib(${filename} + FILETYPE ${filetype} + SRC ${ARG_SRC} + DEPENDENCIES ${ARG_DEPENDENCIES} + EXTRA_OPTS ${ARG_EXTRA_OPTS} ${${filetype}_device_compile_opts}) + endforeach() + + foreach(arch IN LISTS devicelib_arch) + compile_lib(${filename}--${arch} + FILETYPE bc + SRC ${ARG_SRC} + DEPENDENCIES ${ARG_DEPENDENCIES} + EXTRA_OPTS ${ARG_EXTRA_OPTS} ${bc_device_compile_opts} + ${compile_opts_${arch}}) + + append_to_property(${bc_binary_dir}/${filename}--${arch}.bc + PROPERTY_NAME BC_DEVICE_LIBS_${arch}) + endforeach() endfunction() +# Set up the dependency lists for the libdevice libraries set(crt_obj_deps wrapper.h device.h spirv_vars.h sycl-compiler) set(complex_obj_deps device_complex.h device.h sycl-compiler) set(cmath_obj_deps device_math.h device.h sycl-compiler) @@ -174,37 +209,91 @@ if("native_cpu" IN_LIST SYCL_ENABLE_PLUGINS) # Include NativeCPU UR adapter path to enable finding header file with state struct. # libsycl-nativecpu_utils is only needed as BC file by NativeCPU. # Todo: add versions for other targets (for cross-compilation) - add_devicelib_bc(libsycl-nativecpu_utils SRC nativecpu_utils.cpp DEP ${itt_obj_deps} EXTRA_ARGS -I ${NATIVE_CPU_DIR} -fsycl-targets=native_cpu) + compile_lib(libsycl-nativecpu_utils + FILETYPE bc + SRC nativecpu_utils.cpp + DEPENDENCIES ${itt_obj_deps} + EXTRA_OPTS -I ${NATIVE_CPU_DIR} -fsycl-targets=native_cpu -fsycl-device-only + -fsycl-device-obj=llvmir) endif() -add_devicelib(libsycl-itt-stubs SRC itt_stubs.cpp DEP ${itt_obj_deps}) -add_devicelib(libsycl-itt-compiler-wrappers SRC itt_compiler_wrappers.cpp DEP ${itt_obj_deps}) -add_devicelib(libsycl-itt-user-wrappers SRC itt_user_wrappers.cpp DEP ${itt_obj_deps}) - -add_devicelib(libsycl-crt SRC crt_wrapper.cpp DEP ${crt_obj_deps}) -add_devicelib(libsycl-complex SRC complex_wrapper.cpp DEP ${complex_obj_deps}) -add_devicelib(libsycl-complex-fp64 SRC complex_wrapper_fp64.cpp DEP ${complex_obj_deps} ) -add_devicelib(libsycl-cmath SRC cmath_wrapper.cpp DEP ${cmath_obj_deps}) -add_devicelib(libsycl-cmath-fp64 SRC cmath_wrapper_fp64.cpp DEP ${cmath_obj_deps} ) -add_devicelib(libsycl-imf SRC imf_wrapper.cpp DEP ${imf_obj_deps}) -add_devicelib(libsycl-imf-fp64 SRC imf_wrapper_fp64.cpp DEP ${imf_obj_deps}) -add_devicelib(libsycl-imf-bf16 SRC imf_wrapper_bf16.cpp DEP ${imf_obj_deps}) -add_devicelib(libsycl-bfloat16 SRC bfloat16_wrapper.cpp DEP ${cmath_obj_deps} ) +# Add all device libraries for each filetype except for the Intel math function +# ones. +add_devicelibs(libsycl-itt-stubs + SRC itt_stubs.cpp + DEPENDENCIES ${itt_obj_deps}) +add_devicelibs(libsycl-itt-compiler-wrappers + SRC itt_compiler_wrappers.cpp + DEPENDENCIES ${itt_obj_deps}) +add_devicelibs(libsycl-itt-user-wrappers + SRC itt_user_wrappers.cpp + DEPENDENCIES ${itt_obj_deps}) + +add_devicelibs(libsycl-crt + SRC crt_wrapper.cpp + DEPENDENCIES ${crt_obj_deps}) +add_devicelibs(libsycl-complex + SRC complex_wrapper.cpp + DEPENDENCIES ${complex_obj_deps}) +add_devicelibs(libsycl-complex-fp64 + SRC complex_wrapper_fp64.cpp + DEPENDENCIES ${complex_obj_deps} ) +add_devicelibs(libsycl-cmath + SRC cmath_wrapper.cpp + DEPENDENCIES ${cmath_obj_deps}) +add_devicelibs(libsycl-cmath-fp64 + SRC cmath_wrapper_fp64.cpp + DEPENDENCIES ${cmath_obj_deps} ) +add_devicelibs(libsycl-imf + SRC imf_wrapper.cpp + DEPENDENCIES ${imf_obj_deps}) +add_devicelibs(libsycl-imf-fp64 + SRC imf_wrapper_fp64.cpp + DEPENDENCIES ${imf_obj_deps}) +add_devicelibs(libsycl-imf-bf16 + SRC imf_wrapper_bf16.cpp + DEPENDENCIES ${imf_obj_deps}) +add_devicelibs(libsycl-bfloat16 + SRC bfloat16_wrapper.cpp + DEPENDENCIES ${cmath_obj_deps}) if(MSVC) - add_devicelib(libsycl-msvc-math SRC msvc_math.cpp DEP ${cmath_obj_deps}) + add_devicelibs(libsycl-msvc-math + SRC msvc_math.cpp + DEPENDENCIES ${cmath_obj_deps}) else() - add_devicelib(libsycl-sanitizer SRC sanitizer_utils.cpp DEP ${sanitizer_obj_deps} EXTRA_ARGS -fno-sycl-instrument-device-code) + add_devicelibs(libsycl-sanitizer + SRC sanitizer_utils.cpp + DEPENDENCIES ${sanitizer_obj_deps} + EXTRA_OPTS -fno-sycl-instrument-device-code) endif() -add_devicelib(libsycl-fallback-cassert SRC fallback-cassert.cpp DEP ${crt_obj_deps} EXTRA_ARGS -fno-sycl-instrument-device-code) -add_devicelib(libsycl-fallback-cstring SRC fallback-cstring.cpp DEP ${crt_obj_deps}) -add_devicelib(libsycl-fallback-complex SRC fallback-complex.cpp DEP ${complex_obj_deps}) -add_devicelib(libsycl-fallback-complex-fp64 SRC fallback-complex-fp64.cpp DEP ${complex_obj_deps} ) -add_devicelib(libsycl-fallback-cmath SRC fallback-cmath.cpp DEP ${cmath_obj_deps}) -add_devicelib(libsycl-fallback-cmath-fp64 SRC fallback-cmath-fp64.cpp DEP ${cmath_obj_deps}) -add_devicelib(libsycl-fallback-bfloat16 SRC fallback-bfloat16.cpp DEP ${bfloat16_obj_deps}) -add_devicelib(libsycl-native-bfloat16 SRC bfloat16_wrapper.cpp DEP ${bfloat16_obj_deps}) - +add_devicelibs(libsycl-fallback-cassert + SRC fallback-cassert.cpp + DEPENDENCIES ${crt_obj_deps} + EXTRA_OPTS -fno-sycl-instrument-device-code) +add_devicelibs(libsycl-fallback-cstring + SRC fallback-cstring.cpp + DEPENDENCIES ${crt_obj_deps}) +add_devicelibs(libsycl-fallback-complex + SRC fallback-complex.cpp + DEPENDENCIES ${complex_obj_deps}) +add_devicelibs(libsycl-fallback-complex-fp64 + SRC fallback-complex-fp64.cpp + DEPENDENCIES ${complex_obj_deps}) +add_devicelibs(libsycl-fallback-cmath + SRC fallback-cmath.cpp + DEPENDENCIES ${cmath_obj_deps}) +add_devicelibs(libsycl-fallback-cmath-fp64 + SRC fallback-cmath-fp64.cpp + DEPENDENCIES ${cmath_obj_deps}) +add_devicelibs(libsycl-fallback-bfloat16 + SRC fallback-bfloat16.cpp + DEPENDENCIES ${bfloat16_obj_deps}) +add_devicelibs(libsycl-native-bfloat16 + SRC bfloat16_wrapper.cpp + DEPENDENCIES ${bfloat16_obj_deps}) + +# Create dependency and source lists for Intel math function libraries. file(MAKE_DIRECTORY ${obj_binary_dir}/libdevice) set(imf_fallback_src_dir ${obj_binary_dir}/libdevice) set(imf_src_dir ${CMAKE_CURRENT_SOURCE_DIR}) @@ -217,8 +306,7 @@ set(imf_fallback_fp32_deps device.h device_imf.hpp imf_half.hpp imf_rounding_op. imf/imf_inline_fp32.cpp imf/imf_fp32_dl.cpp) set(imf_fallback_fp64_deps device.h device_imf.hpp imf_half.hpp imf_rounding_op.hpp imf_impl_utils.hpp - imf_utils/double_convert.cpp - imf_utils/fp64_round.cpp + imf_utils/double_convert.cpp imf_utils/fp64_round.cpp imf/imf_inline_fp64.cpp imf/imf_fp64_dl.cpp) set(imf_fallback_bf16_deps device.h device_imf.hpp imf_bf16.hpp @@ -275,320 +363,197 @@ if (NOT WIN32) add_imf_host_cxx_flags_compile_flags_if_supported("-fcf-protection=full") endif() -add_custom_command(OUTPUT ${imf_fp32_fallback_src} - COMMAND ${CMAKE_COMMAND} -D SRC_DIR=${imf_src_dir} - -D DEST_DIR=${imf_fallback_src_dir} - -D IMF_TARGET=FP32 - -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake - DEPENDS ${imf_fallback_fp32_deps}) - -add_custom_command(OUTPUT ${imf_fp64_fallback_src} - COMMAND ${CMAKE_COMMAND} -D SRC_DIR=${imf_src_dir} - -D DEST_DIR=${imf_fallback_src_dir} - -D IMF_TARGET=FP64 - -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake - DEPENDS ${imf_fallback_fp64_deps}) - -add_custom_command(OUTPUT ${imf_bf16_fallback_src} - COMMAND ${CMAKE_COMMAND} -D SRC_DIR=${imf_src_dir} - -D DEST_DIR=${imf_fallback_src_dir} - -D IMF_TARGET=BF16 - -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake - DEPENDS ${imf_fallback_bf16_deps}) - -add_custom_target(get_imf_fallback_fp32 DEPENDS ${imf_fp32_fallback_src}) -add_custom_command(OUTPUT ${spv_binary_dir}/libsycl-fallback-imf.spv - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${spv_binary_dir}/libsycl-fallback-imf.spv - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${bc_binary_dir}/libsycl-fallback-imf.bc - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=llvmir - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${bc_binary_dir}/libsycl-fallback-imf.bc - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 - sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} - COMMAND ${clang} -fsycl -c - ${compile_opts} ${sycl_targets_opt} - ${imf_fp32_fallback_src} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - -o ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix} - COMMAND ${clang} -fsycl -c --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} - ${imf_fp32_fallback_src} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - -o ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_target(get_imf_fallback_fp64 DEPENDS ${imf_fp64_fallback_src}) -add_custom_command(OUTPUT ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=llvmir - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 - sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${compile_opts} ${sycl_targets_opt} - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_target(get_imf_fallback_bf16 DEPENDS ${imf_bf16_fallback_src}) -add_custom_command(OUTPUT ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=llvmir - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 - sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${compile_opts} ${sycl_targets_opt} - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_target(imf_fallback_fp32_spv DEPENDS ${spv_binary_dir}/libsycl-fallback-imf.spv) -add_custom_target(imf_fallback_fp32_bc DEPENDS ${bc_binary_dir}/libsycl-fallback-imf.bc) -add_custom_target(imf_fallback_fp32_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix}) -add_custom_target(imf_fallback_fp32_host_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix}) -add_custom_target(imf_fallback_fp32_new_offload_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix}) -add_custom_target(imf_fallback_fp32_host_new_offload_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix}) -add_dependencies(libsycldevice-spv imf_fallback_fp32_spv) -add_dependencies(libsycldevice-bc imf_fallback_fp32_bc) -add_dependencies(libsycldevice-obj imf_fallback_fp32_obj) -add_dependencies(libsycldevice-obj imf_fallback_fp32_new_offload_obj) - -add_custom_target(imf_fallback_fp64_spv DEPENDS ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv) -add_custom_target(imf_fallback_fp64_bc DEPENDS ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc) -add_custom_target(imf_fallback_fp64_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix}) -add_custom_target(imf_fallback_fp64_host_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix}) -add_custom_target(imf_fallback_fp64_new_offload_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix}) -add_custom_target(imf_fallback_fp64_host_new_offload_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix}) -add_dependencies(libsycldevice-spv imf_fallback_fp64_spv) -add_dependencies(libsycldevice-bc imf_fallback_fp64_bc) -add_dependencies(libsycldevice-obj imf_fallback_fp64_obj) -add_dependencies(libsycldevice-obj imf_fallback_fp64_new_offload_obj) - -add_custom_target(imf_fallback_bf16_spv DEPENDS ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv) -add_custom_target(imf_fallback_bf16_bc DEPENDS ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc) -add_custom_target(imf_fallback_bf16_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix}) -add_custom_target(imf_fallback_bf16_host_obj DEPENDS ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix}) -add_custom_target(imf_fallback_bf16_new_offload_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix}) -add_custom_target(imf_fallback_bf16_host_new_offload_obj DEPENDS ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix}) -add_dependencies(libsycldevice-spv imf_fallback_bf16_spv) -add_dependencies(libsycldevice-bc imf_fallback_bf16_bc) -add_dependencies(libsycldevice-obj imf_fallback_bf16_obj) -add_dependencies(libsycldevice-obj imf_fallback_bf16_new_offload_obj) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp32-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - -o ${obj_binary_dir}/imf-fp32-host.${lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - -o ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp64-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - -o ${obj_binary_dir}/imf-fp64-host.${lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - -o ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-bf16-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - -o ${obj_binary_dir}/imf-bf16-host.${lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - -o ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_target(imf_fp32_host_obj DEPENDS ${obj_binary_dir}/imf-fp32-host.${lib-suffix}) -add_custom_target(imf_fp64_host_obj DEPENDS ${obj_binary_dir}/imf-fp64-host.${lib-suffix}) -add_custom_target(imf_bf16_host_obj DEPENDS ${obj_binary_dir}/imf-bf16-host.${lib-suffix}) - -add_custom_target(imf_fp32_host_new_offload_obj DEPENDS ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix}) -add_custom_target(imf_fp64_host_new_offload_obj DEPENDS ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix}) -add_custom_target(imf_bf16_host_new_offload_obj DEPENDS ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix}) - -add_custom_target(imf_host_obj DEPENDS ${obj_binary_dir}/${devicelib_host_static}) -add_custom_command(OUTPUT ${obj_binary_dir}/${devicelib_host_static} - COMMAND ${llvm-ar} rcs ${obj_binary_dir}/${devicelib_host_static} - ${obj_binary_dir}/imf-fp32-host.${lib-suffix} - ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix} - ${obj_binary_dir}/imf-fp64-host.${lib-suffix} - ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix} - ${obj_binary_dir}/imf-bf16-host.${lib-suffix} - ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix} - DEPENDS imf_fp32_host_obj imf_fallback_fp32_host_obj - DEPENDS imf_fp64_host_obj imf_fallback_fp64_host_obj - DEPENDS imf_bf16_host_obj imf_fallback_bf16_host_obj - DEPENDS sycl-compiler - VERBATIM) -add_custom_target(imf_host_new_offload_obj DEPENDS ${obj_binary_dir}/${devicelib_host_static_new_offload}) -add_custom_command(OUTPUT ${obj_binary_dir}/${devicelib_host_static_new_offload} - COMMAND ${llvm-ar} rcs ${obj_binary_dir}/${devicelib_host_static_new_offload} - ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix} - ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix} - ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix} - ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix} - ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix} - ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix} - DEPENDS imf_fp32_host_new_offload_obj imf_fallback_fp32_host_new_offload_obj - DEPENDS imf_fp64_host_new_offload_obj imf_fallback_fp64_host_new_offload_obj - DEPENDS imf_bf16_host_new_offload_obj imf_fallback_bf16_host_new_offload_obj - DEPENDS sycl-compiler - VERBATIM) -add_dependencies(libsycldevice-obj imf_host_obj) -add_dependencies(libsycldevice-obj imf_host_new_offload_obj) -install(FILES ${spv_binary_dir}/libsycl-fallback-imf.spv - ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv - ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv - DESTINATION ${install_dest_spv} - COMPONENT libsycldevice) - -install(FILES ${bc_binary_dir}/libsycl-fallback-imf.bc - ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc - ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc - DESTINATION ${install_dest_bc} - COMPONENT libsycldevice) - -install(FILES ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix} - ${obj_binary_dir}/${devicelib_host_static} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) - -install(FILES ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix} - ${obj_binary_dir}/${devicelib_host_static_new_offload} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) +set(obj-new-offload_host_compile_opts ${imf_host_cxx_flags} --offload-new-driver + -foffload-lto=thin) +set(obj_host_compile_opts ${imf_host_cxx_flags}) + +foreach(datatype IN ITEMS fp32 fp64 bf16) + string(TOUPPER ${datatype} upper_datatype) + + add_custom_command( + OUTPUT ${imf_${datatype}_fallback_src} + COMMAND ${CMAKE_COMMAND} + -D SRC_DIR=${imf_src_dir} + -D DEST_DIR=${imf_fallback_src_dir} + -D IMF_TARGET=${upper_datatype} + -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake + DEPENDS ${imf_fallback_${datatype}_deps}) + + add_custom_target(get_imf_fallback_${datatype} + DEPENDS ${imf_${datatype}_fallback_src}) +endforeach() + +# Adds Intel math functions libraries. +# +# Arguments: +# * SRC ... +# Source code files needed for the compilation. +# * DIR +# The directory where the output file should be located in. +# * FTYPE +# Filetype of the output library file (e.g. 'bc'). +# * DTYPE +# The datatype of the library, which determines the input source +# and dependencies of the compilation command. +# * TGT_NAME +# Name of the new target that depends on the compilation of the library. +# * EXTRA_OPTS ... +# List of extra compiler options to use. +# Note that the ones specified by the compile_opts var are always used. +# +# Depends on the clang target for compiling. +function(add_lib_imf name) + cmake_parse_arguments(ARG + "" + "DIR;FTYPE;DTYPE;TGT_NAME" + "EXTRA_OPTS" + ${ARGN}) + + add_custom_command( + OUTPUT ${ARG_DIR}/${name}.${${ARG_FTYPE}-suffix} + COMMAND ${clang} ${compile_opts} ${ARG_EXTRA_OPTS} + -I ${CMAKE_CURRENT_SOURCE_DIR}/imf + ${imf_${ARG_DTYPE}_fallback_src} + -o + ${ARG_DIR}/${name}.${${ARG_FTYPE}-suffix} + DEPENDS ${imf_fallback_${ARG_DTYPE}_deps} + get_imf_fallback_${ARG_DTYPE} sycl-compiler + VERBATIM) + + add_custom_target(${ARG_TGT_NAME} + DEPENDS ${ARG_DIR}/${name}.${${ARG_FTYPE}-suffix}) + + add_dependencies(libsycldevice-${ARG_FTYPE} ${ARG_TGT_NAME}) +endfunction() + +# Add device fallback imf libraries for the SPIRV targets and all filetypes. +foreach(dtype IN ITEMS bf16 fp32 fp64) + foreach(ftype IN LISTS filetypes) + set(libsycl_name libsycl-fallback-imf) + if (NOT (dtype STREQUAL "fp32")) + set(libsycl_name libsycl-fallback-imf-${dtype}) + endif() + set(tgt_name imf_fallback_${dtype}_${ftype}) + + add_lib_imf(${libsycl_name} + DIR ${${ftype}_binary_dir} + FTYPE ${ftype} + DTYPE ${dtype} + EXTRA_OPTS ${${ftype}_device_compile_opts} + TGT_NAME ${tgt_name}) + endforeach() +endforeach() + +# Add device fallback imf libraries for the CUDA target. +# The output files are bitcode. +foreach(arch IN LISTS devicelib_arch) + foreach(dtype IN ITEMS bf16 fp32 fp64) + set(tgt_name imf_fallback_${dtype}_bc_${arch}) + + add_lib_imf(libsycl-fallback-imf-${arch}-${dtype} + ARCH ${arch} + DIR ${bc_binary_dir} + FTYPE bc + DTYPE ${dtype} + EXTRA_OPTS ${bc_device_compile_opts} ${compile_opts_${arch}} + TGT_NAME ${tgt_name}) + + append_to_property( + ${bc_binary_dir}/libsycl-fallback-imf-${arch}-${dtype}.${bc-suffix} + PROPERTY_NAME ${arch}) + endforeach() +endforeach() + +# Create one large bitcode file for the CUDA targets. +# Use all the files collected in the respective global properties. +foreach(arch IN LISTS devicelib_arch) + get_property(BC_DEVICE_LIBS_${arch} GLOBAL PROPERTY BC_DEVICE_LIBS_${arch}) + # Link the bitcode files together. + link_bc(TARGET device_lib_device_${arch} + RSP_DIR ${CMAKE_CURRENT_BINARY_DIR} + INPUTS ${BC_DEVICE_LIBS_${arch}}) + set( builtins_link_lib_${arch} + $) + add_dependencies(libsycldevice-bc device_lib_device_${arch}) + set( builtins_opt_lib_tgt_${arch} builtins_${arch}.opt) + + # Run the optimizer on the resulting bitcode file and call prepare_builtins + # on it, which strips away debug and arch information. + process_bc(devicelib--${arch}.bc + LIB_TGT builtins_${arch}.opt + IN_FILE ${builtins_link_lib_${arch}} + OUT_DIR ${bc_binary_dir} + OPT_FLAGS ${opt_flags_${arch}} + DEPENDENCIES device_lib_device_${arch}) + add_dependencies(libsycldevice-bc prepare-devicelib--${arch}.bc) + set(complete_${arch}_libdev + $) + install( FILES ${complete_${arch}_libdev} + DESTINATION ${install_dest_bc} + COMPONENT libsycldevice) +endforeach() + +# Add host device imf libraries for obj and new offload objects. +foreach(dtype IN ITEMS bf16 fp32 fp64) + foreach(ftype IN ITEMS obj obj-new-offload) + set(tgt_name imf_fallback_${dtype}_host_${ftype}) + + add_lib_imf(fallback-imf-${dtype}-host + DIR ${${ftype}_binary_dir} + FTYPE ${ftype} + DTYPE ${dtype} + EXTRA_OPTS ${${ftype}_host_compile_opts} + TGT_NAME ${tgt_name}) + + set(wrapper_name imf_wrapper.cpp) + if (NOT ("${dtype}" STREQUAL "fp32")) + set(wrapper_name imf_wrapper_${dtype}.cpp) + endif() + add_custom_command( + OUTPUT ${${ftype}_binary_dir}/imf-${dtype}-host.${${ftype}-suffix} + COMMAND ${clang} ${${ftype}_host_compile_opts} + ${CMAKE_CURRENT_SOURCE_DIR}/${wrapper_name} + -o ${${ftype}_binary_dir}/imf-${dtype}-host.${${ftype}-suffix} + MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/${wrapper_name} + DEPENDS ${imf_obj_deps} + VERBATIM) + + add_custom_target(imf_${dtype}_host_${ftype} DEPENDS + ${obj_binary_dir}/imf-${dtype}-host.${${ftype}-suffix}) + endforeach() +endforeach() + +foreach(ftype IN ITEMS obj obj-new-offload) + add_custom_target(imf_host_${ftype} + DEPENDS ${${ftype}_binary_dir}/${devicelib_host_static_${ftype}}) + add_custom_command( + OUTPUT ${${ftype}_binary_dir}/${devicelib_host_static_${ftype}} + COMMAND ${llvm-ar} rcs + ${${ftype}_binary_dir}/${devicelib_host_static_${ftype}} + ${${ftype}_binary_dir}/imf-fp32-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/fallback-imf-fp32-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/imf-fp64-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/fallback-imf-fp64-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/imf-bf16-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/fallback-imf-bf16-host.${${ftype}-suffix} + DEPENDS imf_fp32_host_${ftype} imf_fallback_fp32_host_${ftype} + DEPENDS imf_fp64_host_${ftype} imf_fallback_fp64_host_${ftype} + DEPENDS imf_bf16_host_${ftype} imf_fallback_bf16_host_${ftype} + DEPENDS sycl-compiler + VERBATIM) + add_dependencies(libsycldevice-obj imf_host_${ftype}) + + install( FILES ${obj_binary_dir}/${devicelib_host_static_${ftype}} + DESTINATION ${install_dest_obj} + COMPONENT libsycldevice) +endforeach() + +foreach(ftype IN LISTS filetypes) + install( + FILES ${${ftype}_binary_dir}/libsycl-fallback-imf.${${ftype}-suffix} + ${${ftype}_binary_dir}/libsycl-fallback-imf-fp64.${${ftype}-suffix} + ${${ftype}_binary_dir}/libsycl-fallback-imf-bf16.${${ftype}-suffix} + DESTINATION ${install_dest_${ftype}} + COMPONENT libsycldevice) +endforeach() + From 4223cfdf9f21ae1260f23e93b9668ac1979d10cc Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Fri, 23 Aug 2024 10:51:48 +0100 Subject: [PATCH 2/5] Fix subset devicelib linking for NVPTX Ensure that when subsets of devicelibs are excluded or included by fno-sycl-device-lib or fsycl-device-lib, the correct libraries are linked. For -fsycl-device-lib this means that regardless of which libraries are specified, the full single devicelib for NVPTX will be used. For -fno-sycl-device-lib it means that its values will be ignored, unless it contains "all", in which case only internal libraries will be linked against. Fix an error when printing a warning that some device libraries have been ignored for -fno-sycl-device-lib --- clang/lib/Driver/ToolChains/SYCL.cpp | 44 ++++++++++++++++++---------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index dc1916cf6c1d5..a31ee3644cef8 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -221,13 +221,17 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, StringRef DeviceLibOption; }; - bool NoDeviceLibs = false; - // Currently, all SYCL device libraries will be linked by default. Linkage - // of "internal" libraries cannot be affected via -fno-sycl-device-lib. + // Currently, all SYCL device libraries will be linked by default. llvm::StringMap DeviceLibLinkInfo = { {"libc", true}, {"libm-fp32", true}, {"libm-fp64", true}, {"libimf-fp32", true}, {"libimf-fp64", true}, {"libimf-bf16", true}, {"libm-bfloat16", true}, {"internal", true}}; + + // If -fno-sycl-device-lib is specified, its values will be used to exclude + // linkage of libraries specified by DeviceLibLinkInfo. Linkage of "internal" + // libraries cannot be affected via -fno-sycl-device-lib. + bool ExcludeDeviceLibs = false; + if (Arg *A = Args.getLastArg(options::OPT_fsycl_device_lib_EQ, options::OPT_fno_sycl_device_lib_EQ)) { if (A->getValues().size() == 0) @@ -235,15 +239,24 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, << A->getAsString(Args); else { if (A->getOption().matches(options::OPT_fno_sycl_device_lib_EQ)) - NoDeviceLibs = true; + ExcludeDeviceLibs = true; + + // When single libraries are ignored and a subset of library names + // not containing the value "all" is specified by -fno-sycl-device-lib, + // print an unused argument warning. + bool PrintUnusedExcludeWarning = false; - bool PrintUnusedLibWarning = false; for (StringRef Val : A->getValues()) { if (Val == "all") { + PrintUnusedExcludeWarning = false; + + // Make sure that internal libraries are still linked against + // when -fno-sycl-device-lib contains "all" and single libraries + // should be ignored. + IgnoreSingleLibs = IgnoreSingleLibs && !ExcludeDeviceLibs; + for (const auto &K : DeviceLibLinkInfo.keys()) - DeviceLibLinkInfo[K] = (!IgnoreSingleLibs && !NoDeviceLibs) || - (K == "internal" && NoDeviceLibs); - PrintUnusedLibWarning = false; + DeviceLibLinkInfo[K] = (K == "internal") || !ExcludeDeviceLibs; break; } auto LinkInfoIter = DeviceLibLinkInfo.find(Val); @@ -254,20 +267,21 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, C.getDriver().Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << Val; } - DeviceLibLinkInfo[Val] = !NoDeviceLibs && !IgnoreSingleLibs; - PrintUnusedLibWarning = IgnoreSingleLibs && !NoDeviceLibs; + DeviceLibLinkInfo[Val] = !ExcludeDeviceLibs; + PrintUnusedExcludeWarning = IgnoreSingleLibs && ExcludeDeviceLibs; } - if (PrintUnusedLibWarning) - C.getDriver().Diag(diag::warn_ignored_clang_option) - << A->getSpelling() << A->getAsString(Args); + if (PrintUnusedExcludeWarning) + C.getDriver().Diag(diag::warn_drv_unused_argument) << A->getSpelling(); } } - if (TargetTriple.isNVPTX() && !NoDeviceLibs) + if (TargetTriple.isNVPTX() && IgnoreSingleLibs) { LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc")); + } - if (IgnoreSingleLibs && !NoDeviceLibs) + if (IgnoreSingleLibs) { return LibraryList; + } using SYCLDeviceLibsList = SmallVector; From e574b9a3df997472af738bf9600ac39bdc76ec11 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Mon, 26 Aug 2024 12:58:24 +0100 Subject: [PATCH 3/5] Add device lib linking test Check if device lib flags get treated correctly for NVPTX and that the linking actions for the correct device libraries are generated. --- clang/test/Driver/sycl-device-lib-nvptx.cpp | 40 +++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 clang/test/Driver/sycl-device-lib-nvptx.cpp diff --git a/clang/test/Driver/sycl-device-lib-nvptx.cpp b/clang/test/Driver/sycl-device-lib-nvptx.cpp new file mode 100644 index 0000000000000..71cbd0f47e8d1 --- /dev/null +++ b/clang/test/Driver/sycl-device-lib-nvptx.cpp @@ -0,0 +1,40 @@ +// Tests specific to `-fsycl-targets=nvptx64-nvidia-nvptx` +// Verify that the correct devicelib linking actions are spawned by the driver. +// Check also if the correct warnings are generated. + +// UNSUPPORTED: system-windows + +// Check if internal libraries are still linked against when linkage of all device libs is manually excluded. +// RUN: %clangxx -ccc-print-phases -std=c++11 \ +// RUN: -fsycl -fno-sycl-device-lib=all \ +// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s + +// CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50) +// CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50) +// CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50) +// CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, sm_50) + +// Check that the -fsycl-device-lib flag has no effect when "all" is specified. +// RUN: %clangxx -ccc-print-phases -std=c++11 \ +// RUN: -fsycl -fsycl-device-lib=all \ +// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-ALL %s + +// Check that the -fsycl-device-lib flag has no effect when subsets of libs are specified. +// RUN: %clangxx -ccc-print-phases -std=c++11 \ +// RUN: -fsycl -fsycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \ +// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-ALL %s + +// Check that -fno-sycl-device-lib is ignored when it does not contain "all". +// A warning should be printed that the flag got ignored. +// RUN: %clangxx -ccc-print-phases -std=c++11 \ +// RUN: -fsycl -fno-sycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \ +// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=CHK-UNUSED-WARN,CHK-ALL %s + +// CHK-UNUSED-WARN: warning: argument unused during compilation: '-fno-sycl-device-lib=' +// CHK-ALL: [[DEVLIB:[0-9]+]]: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50) +// CHK-ALL: {{[0-9]+}}: linker, {{{.*}}[[DEVLIB]]{{.*}}}, ir, (device-sycl, sm_50) + From f9149521cc0a5648522d24f356bfc5df754a5f92 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Tue, 27 Aug 2024 10:42:47 +0100 Subject: [PATCH 4/5] Add more checks to NVPTX device-lib test Ensure that devicelib--cuda.bc is never linked against, when -fno-sycl-devicle-lib contains the value "all". Fix formatting of comments + run lines. --- clang/test/Driver/sycl-device-lib-nvptx.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/clang/test/Driver/sycl-device-lib-nvptx.cpp b/clang/test/Driver/sycl-device-lib-nvptx.cpp index 71cbd0f47e8d1..2525db0b8c44f 100644 --- a/clang/test/Driver/sycl-device-lib-nvptx.cpp +++ b/clang/test/Driver/sycl-device-lib-nvptx.cpp @@ -4,24 +4,28 @@ // UNSUPPORTED: system-windows -// Check if internal libraries are still linked against when linkage of all device libs is manually excluded. -// RUN: %clangxx -ccc-print-phases -std=c++11 \ -// RUN: -fsycl -fno-sycl-device-lib=all \ +// Check if internal libraries are still linked against when linkage of all +// device libs is manually excluded. +// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fno-sycl-device-lib=all \ // RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s +// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50) // CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50) +// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50) // CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50) +// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50) // CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50) +// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50) // CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, sm_50) // Check that the -fsycl-device-lib flag has no effect when "all" is specified. -// RUN: %clangxx -ccc-print-phases -std=c++11 \ -// RUN: -fsycl -fsycl-device-lib=all \ +// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fsycl-device-lib=all \ // RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-ALL %s -// Check that the -fsycl-device-lib flag has no effect when subsets of libs are specified. +// Check that the -fsycl-device-lib flag has no effect when subsets of libs +// are specified. // RUN: %clangxx -ccc-print-phases -std=c++11 \ // RUN: -fsycl -fsycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \ // RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ @@ -29,8 +33,8 @@ // Check that -fno-sycl-device-lib is ignored when it does not contain "all". // A warning should be printed that the flag got ignored. -// RUN: %clangxx -ccc-print-phases -std=c++11 \ -// RUN: -fsycl -fno-sycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \ +// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl \ +// RUN: -fno-sycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \ // RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-UNUSED-WARN,CHK-ALL %s From 9a79dea9a4c2e332e1d2885395dd47da1dc0bcb2 Mon Sep 17 00:00:00 2001 From: Martin Wehking Date: Mon, 2 Sep 2024 10:40:49 +0100 Subject: [PATCH 5/5] Reformat clang/lib/Driver/ToolChains/SYCL.cpp Co-authored-by: Michael Toguchi --- clang/lib/Driver/ToolChains/SYCL.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index a31ee3644cef8..6e9d7fee5b961 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -275,13 +275,11 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, } } - if (TargetTriple.isNVPTX() && IgnoreSingleLibs) { + if (TargetTriple.isNVPTX() && IgnoreSingleLibs) LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc")); - } - if (IgnoreSingleLibs) { + if (IgnoreSingleLibs) return LibraryList; - } using SYCLDeviceLibsList = SmallVector;