Skip to content

Commit 1fba3c9

Browse files
committed
[SYCL][ext] Define and Implement sycl_ext_tensor_map
This is a fairly mechanical implementation of the basic infrastructure required to access CUDA TMA descriptors from within SYCL kernels, while initializing them on the host. The new feature exposes two new classes and associated support structure in `sycl::ext::codeplay::experimental::cuda`. There's some ugliness involved to make this work on account of the way NVIDIA implemented this basic feature, but it's all in the name of {legitimate-field-of-endeavour}.
1 parent a1355e8 commit 1fba3c9

File tree

11 files changed

+740
-3
lines changed

11 files changed

+740
-3
lines changed

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

+19-3
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ def AspectExt_oneapi_cuda_cluster_group : Aspect<"ext_oneapi_cuda_cluster_group"
8686
def AspectExt_intel_fpga_task_sequence : Aspect<"ext_intel_fpga_task_sequence">;
8787
def AspectExt_oneapi_atomic16 : Aspect<"ext_oneapi_atomic16">;
8888
def AspectExt_oneapi_virtual_functions : Aspect<"ext_oneapi_virtual_functions">;
89+
def AspectExt_codeplay_cuda_tensor_map : Aspect<"ext_codeplay_cuda_tensor_map">;
90+
8991
// Deprecated aspects
9092
def AspectInt64_base_atomics : Aspect<"int64_base_atomics">;
9193
def AspectInt64_extended_atomics : Aspect<"int64_extended_atomics">;
@@ -150,7 +152,9 @@ def : TargetInfo<"__TestAspectList",
150152
AspectExt_oneapi_queue_profiling_tag, AspectExt_oneapi_virtual_mem, AspectExt_oneapi_cuda_cluster_group,
151153
AspectExt_intel_fpga_task_sequence,
152154
AspectExt_oneapi_atomic16,
153-
AspectExt_oneapi_virtual_functions],
155+
AspectExt_oneapi_virtual_functions,
156+
AspectExt_codeplay_cuda_tensor_map,
157+
],
154158
[]>;
155159
// This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT
156160
// match.
@@ -265,9 +269,21 @@ def : CudaTargetInfo<"nvidia_gpu_sm_87", !listconcat(CudaMinAspects, CudaBindles
265269
def : CudaTargetInfo<"nvidia_gpu_sm_89", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
266270
[AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier])>;
267271
def : CudaTargetInfo<"nvidia_gpu_sm_90", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
268-
[AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier, AspectExt_oneapi_cuda_cluster_group])>;
272+
[
273+
AspectFp16,
274+
AspectAtomic64,
275+
AspectExt_oneapi_cuda_async_barrier,
276+
AspectExt_oneapi_cuda_cluster_group,
277+
AspectExt_codeplay_cuda_tensor_map,
278+
])>;
269279
def : CudaTargetInfo<"nvidia_gpu_sm_90a", !listconcat(CudaMinAspects, CudaBindlessImagesAspects,
270-
[AspectFp16, AspectAtomic64, AspectExt_oneapi_cuda_async_barrier, AspectExt_oneapi_cuda_cluster_group])>;
280+
[
281+
AspectFp16,
282+
AspectAtomic64,
283+
AspectExt_oneapi_cuda_async_barrier,
284+
AspectExt_oneapi_cuda_cluster_group,
285+
AspectExt_codeplay_cuda_tensor_map,
286+
])>;
271287

272288
//
273289
// HIP / AMDGPU device aspects

0 commit comments

Comments
 (0)