Skip to content

Commit ce82628

Browse files
committed
[SYCL][ext] Define and Implement sycl_ext_tensor_map
This is a fairly mechanical implementation of the basic infrastructure required to access CUDA TMA descriptors from within SYCL kernels, while initializing them on the host. The new feature exposes two new classes and associated support structure in `sycl::ext::codeplay::experimental::cuda`. There's some ugliness involved to make this work on account of the way NVIDIA implemented this basic feature, but it's all in the name of {legitimate-field-of-endeavour}.
1 parent ff219eb commit ce82628

File tree

9 files changed

+721
-3
lines changed

9 files changed

+721
-3
lines changed

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

+19-3
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ def AspectExt_oneapi_cuda_cluster_group : Aspect<"ext_oneapi_cuda_cluster_group"
8686
def AspectExt_intel_fpga_task_sequence : Aspect<"ext_intel_fpga_task_sequence">;
8787
def AspectExt_oneapi_atomic16 : Aspect<"ext_oneapi_atomic16">;
8888
def AspectExt_oneapi_virtual_functions : Aspect<"ext_oneapi_virtual_functions">;
89+
def AspectExt_codeplay_cuda_tensor_map : Aspect<"ext_codeplay_cuda_tensor_map">;
90+
8991
// Deprecated aspects
9092
def AspectInt64_base_atomics : Aspect<"int64_base_atomics">;
9193
def AspectInt64_extended_atomics : Aspect<"int64_extended_atomics">;
@@ -150,7 +152,9 @@ def : TargetInfo<"__TestAspectList",
150152
AspectExt_oneapi_queue_profiling_tag, AspectExt_oneapi_virtual_mem, AspectExt_oneapi_cuda_cluster_group,
151153
AspectExt_intel_fpga_task_sequence,
152154
AspectExt_oneapi_atomic16,
153-
AspectExt_oneapi_virtual_functions],
155+
AspectExt_oneapi_virtual_functions,
156+
AspectExt_codeplay_cuda_tensor_map,
157+
],
154158
[]>;
155159
// This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT
156160
// match.
@@ -259,9 +263,21 @@ def : CudaTargetInfo<"nvidia_gpu_sm_87", !listconcat(CudaMinAspects, CudaBindles
259263
def : CudaTargetInfo<"nvidia_gpu_sm_89", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
260264
[AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier])>;
261265
def : CudaTargetInfo<"nvidia_gpu_sm_90", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
262-
[AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier, AspectExt_oneapi_cuda_cluster_group])>;
266+
[
267+
AspectFp16,
268+
AspectAtomic64,
269+
AspectExt_oneapi_cuda_async_barrier,
270+
AspectExt_oneapi_cuda_cluster_group,
271+
AspectExt_codeplay_cuda_tensor_map,
272+
])>;
263273
def : CudaTargetInfo<"nvidia_gpu_sm_90a", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
264-
[AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier, AspectExt_oneapi_cuda_cluster_group])>;
274+
[
275+
AspectFp16,
276+
AspectAtomic64,
277+
AspectExt_oneapi_cuda_async_barrier,
278+
AspectExt_oneapi_cuda_cluster_group,
279+
AspectExt_codeplay_cuda_tensor_map,
280+
])>;
265281

266282
//
267283
// HIP / AMDGPU device aspects

0 commit comments

Comments
 (0)