From 2b7d50a8a7d52db41465cfc7aefbf77111bfb540 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sun, 1 Mar 2020 10:52:18 +0100 Subject: [PATCH 1/3] [SYCL][CUDA] Add NVPTX64 binary target Add the binary target identifier "nvptx64" for NVIDIA PTX devices. Signed-off-by: Andrea Bocci --- sycl/include/CL/sycl/detail/pi.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 686fdc49f753a..f7441e88b183c 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -432,6 +432,9 @@ static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4; #define PI_DEVICE_BINARY_TARGET_SPIRV64_GEN "spir64_gen" #define PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA "spir64_fpga" +/// PTX 64-bit image <-> "nvptx64", 64-bit NVIDIA PTX device +#define PI_DEVICE_BINARY_TARGET_NVPTX64 "nvptx64" + /// This struct is a record of the device binary information. If the Kind field /// denotes a portable binary type (SPIR-V or LLVM IR), the DeviceTargetSpec field /// can still be specific and denote e.g. FPGA target. From 48ed377aaa19eca37c7d1c0bcbc932cc4672710d Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Sun, 1 Mar 2020 10:58:43 +0100 Subject: [PATCH 2/3] [SYCL][CUDA] Select only NVPTX64 device binaries Search through the available binary images and select the first one for the PI_DEVICE_BINARY_TARGET_NVPTX64 ("nvptx64") target. Return PI_INVALID_BINARY if no "nvptx64" image is available. Signed-off-by: Andrea Bocci --- sycl/plugins/cuda/pi_cuda.cpp | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index fe42b1d8dc3a1..036d691c3619e 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -673,10 +673,10 @@ pi_result cuda_piDevicePartition( return {}; } -pi_result cuda_piextDeviceSelectBinary( - pi_device device, // TODO: does this need to be context? - pi_device_binary *binaries, pi_uint32 num_binaries, - pi_device_binary *selected_binary) { +pi_result cuda_piextDeviceSelectBinary(pi_device device, + pi_device_binary *binaries, + pi_uint32 num_binaries, + pi_device_binary *selected_binary) { if (!binaries) { cl::sycl::detail::pi::die("No list of device images provided"); } @@ -686,8 +686,19 @@ pi_result cuda_piextDeviceSelectBinary( if (!selected_binary) { cl::sycl::detail::pi::die("No storage for device binary provided"); } - *selected_binary = binaries[0]; - return PI_SUCCESS; + + // Look for an image for the NVPTX64 target, and return the first one that is + // found + for (pi_uint32 i = 0; i < num_binaries; i++) { + if (strcmp(binaries[i]->DeviceTargetSpec, + PI_DEVICE_BINARY_TARGET_NVPTX64) == 0) { + *selected_binary = binaries[i]; + return PI_SUCCESS; + } + } + + // No image can be loaded for the given device + return PI_INVALID_BINARY; } pi_result cuda_piextGetDeviceFunctionPointer(pi_device device, From b955e1117226f72995ca55eb02d6be5cbc6d5615 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Mon, 2 Mar 2020 14:23:32 +0100 Subject: [PATCH 3/3] [SYCL] LIT test to check for the impact of the order of the -fsycl-targets Add a LIT test to check that both backends (PI_OPENCL, PI_CUDA) work irrespective of the order of the -fsycl-targets=... arguments. Signed-off-by: Andrea Bocci --- sycl/test/plugins/sycl-targets-order.cpp | 70 ++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 sycl/test/plugins/sycl-targets-order.cpp diff --git a/sycl/test/plugins/sycl-targets-order.cpp b/sycl/test/plugins/sycl-targets-order.cpp new file mode 100644 index 0000000000000..d94ef572eff56 --- /dev/null +++ b/sycl/test/plugins/sycl-targets-order.cpp @@ -0,0 +1,70 @@ +// RUN: %clangxx -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice,nvptx64-unknown-unknown-sycldevice %s -o %t-spir64-nvptx64.out +// RUN: env SYCL_BE=PI_OPENCL %t-spir64-nvptx64.out +// RUN: env SYCL_BE=PI_CUDA %t-spir64-nvptx64.out +// RUN: %clangxx -fsycl -fsycl-targets=nvptx64-unknown-unknown-sycldevice,spir64-unknown-unknown-sycldevice %s -o %t-nvptx64-spir64.out +// RUN: env SYCL_BE=PI_OPENCL %t-nvptx64-spir64.out +// RUN: env SYCL_BE=PI_CUDA %t-nvptx64-spir64.out + +// REQUIRES: opencl, cuda + +//==------- sycl-targets-order.cpp - SYCL -fsycl-targets order test --------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +int main(int argc, char **argv) { + + // select the default SYCL device + cl::sycl::device device{cl::sycl::default_selector{}}; + std::cout << "Running on SYCL device " << device.get_info() + << ", driver version " << device.get_info() + << std::endl; + + // create a queue + cl::sycl::queue queue{device}; + + // create a buffer of 4 ints to be used inside the kernel code + cl::sycl::buffer buffer(4); + + // size of the index space for the kernel + cl::sycl::range<1> NumOfWorkItems{buffer.get_count()}; + + // submit a command group(work) to the queue + queue.submit([&](cl::sycl::handler &cgh) { + // get write only access to the buffer on a device + auto accessor = buffer.get_access(cgh); + // executing the kernel + cgh.parallel_for( + NumOfWorkItems, [=](cl::sycl::id<1> WIid) { + // fill the buffer with indexes + accessor[WIid] = WIid.get(0); + }); + }); + + // get read-only access to the buffer on the host + // introduce an implicit barrier waiting for queue to complete the work + const auto host_accessor = buffer.get_access(); + + // check the results + bool mismatch = false; + for (unsigned int i = 0; i < buffer.get_count(); ++i) { + if (host_accessor[i] != i) { + std::cout << "The result is incorrect for element: " << i + << " , expected: " << i << " , got: " << host_accessor[i] + << std::endl; + mismatch = true; + } + } + + if (not mismatch) { + std::cout << "The results are correct!" << std::endl; + } + + return mismatch; +}