From 3083f0556ec2ef1c0a00b1c9d88876cd80582d1b Mon Sep 17 00:00:00 2001 From: M Moadeli Date: Wed, 25 Jan 2023 16:26:38 +0000 Subject: [PATCH 1/5] Add suppor for amd-gpu-gfx1034 to be a acceptable value for -fsycl-targets. --- clang/lib/Driver/ToolChains/SYCL.cpp | 2 ++ clang/test/Driver/sycl-oneapi-gpu.cpp | 3 ++- sycl/doc/UsersManual.md | 1 + sycl/doc/design/DeviceIf.md | 2 ++ .../sycl_ext_oneapi_device_architecture.asciidoc | 5 +++++ .../ext/oneapi/experimental/device_architecture.hpp | 11 +++++++++-- 6 files changed, 21 insertions(+), 3 deletions(-) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 9274c0f046c80..dc87ff3685201 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -693,6 +693,7 @@ StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) { .Case("amd_gpu_gfx1030", "gfx1030") .Case("amd_gpu_gfx1031", "gfx1031") .Case("amd_gpu_gfx1032", "gfx1032") + .Case("amd_gpu_gfx1034", "gfx1034") .Default(""); return Device; } @@ -757,6 +758,7 @@ SmallString<64> SYCL::gen::getGenDeviceMacro(StringRef DeviceName) { .Case("gfx1030", "AMD_GPU_GFX1030") .Case("gfx1031", "AMD_GPU_GFX1031") .Case("gfx1032", "AMD_GPU_GFX1032") + .Case("gfx1034", "AMD_GPU_GFX1034") .Default(""); if (!Ext.empty()) { Macro = "__SYCL_TARGET_"; diff --git a/clang/test/Driver/sycl-oneapi-gpu.cpp b/clang/test/Driver/sycl-oneapi-gpu.cpp index 1ecc17734448e..618b4a5d9bb57 100644 --- a/clang/test/Driver/sycl-oneapi-gpu.cpp +++ b/clang/test/Driver/sycl-oneapi-gpu.cpp @@ -166,12 +166,13 @@ // RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1031 -DMAC_STR=GFX1031 // RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1032 -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1032 -DMAC_STR=GFX1032 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1034 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1034 -DMAC_STR=GFX1034 // MACRO_AMD: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" // MACRO_AMD: "-D__SYCL_TARGET_AMD_GPU_[[MAC_STR]]__" // DEVICE_AMD: clang-offload-wrapper{{.*}} "-compile-opts=--offload-arch=[[DEV_STR]]{{.*}}" // MACRO_AMD: clang{{.*}} "-fsycl-is-host" // MACRO_AMD: "-D__SYCL_TARGET_AMD_GPU_[[MAC_STR]]__" - /// -fsycl-targets=spir64_x86_64 should set a specific macro // RUN: %clangxx -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=MACRO_X86_64 diff --git a/sycl/doc/UsersManual.md b/sycl/doc/UsersManual.md index 47de3a3c4412b..0a246103b602d 100644 --- a/sycl/doc/UsersManual.md +++ b/sycl/doc/UsersManual.md @@ -92,6 +92,7 @@ and not recommended to use in production environment. * amd_gpu_gfx1030 - AMD GCN GFX10.3 (RDNA 2) architecture * amd_gpu_gfx1031 - GCN GFX10.3 (RDNA 2) architecture * amd_gpu_gfx1032 - GCN GFX10.3 (RDNA 2) architecture + * amd_gpu_gfx1034 - GCN GFX10.3 (RDNA 2) architecture ## Language options diff --git a/sycl/doc/design/DeviceIf.md b/sycl/doc/design/DeviceIf.md index 1c2333e815796..f73645a35c66b 100644 --- a/sycl/doc/design/DeviceIf.md +++ b/sycl/doc/design/DeviceIf.md @@ -108,6 +108,7 @@ recognizes: * `amd_gpu_gfx1030` * `amd_gpu_gfx1031` * `amd_gpu_gfx1032` +* `amd_gpu_gfx1034` The above listed device names may not be mixed with the existing target name `spir64_gen` on the same command line. In addition, the user must not pass the @@ -190,6 +191,7 @@ one of the following corresponding C++ macro names: * `__SYCL_TARGET_AMD_GPU_GFX1030__` * `__SYCL_TARGET_AMD_GPU_GFX1031__` * `__SYCL_TARGET_AMD_GPU_GFX1032__` +* `__SYCL_TARGET_AMD_GPU_GFX1034__` If the user invokes the compiler driver with `-fsycl-targets=spir64_x86_64`, the compiler driver must predefine the following C++ macro name: diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc index 442308c016dd5..0d71f8008d7b8 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc @@ -164,6 +164,7 @@ enum class architecture : /* unspecified */ { amd_gpu_gfx1030, amd_gpu_gfx1031, amd_gpu_gfx1032, + amd_gpu_gfx1034, intel_gpu_8_0_0 = intel_gpu_bdw, intel_gpu_9_0_9 = intel_gpu_skl, @@ -476,6 +477,10 @@ of these enumerators, and it provides a brief description of their meanings. |2 |GCN GFX10.3 (RDNA 2) architecture. +|`amd_gpu_gfx1034` +|3 +|GCN GFX10.3 (RDNA 2) architecture. + |=== [NOTE] diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp index 61472815b404d..eb8df421f594a 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp @@ -67,6 +67,7 @@ enum class architecture { amd_gpu_gfx1030, amd_gpu_gfx1031, amd_gpu_gfx1032, + amd_gpu_gfx1034, // Update "detail::max_architecture" below if you add new elements here! intel_gpu_8_0_0 = intel_gpu_bdw, intel_gpu_9_0_9 = intel_gpu_skl, @@ -88,7 +89,7 @@ enum class architecture { namespace detail { static constexpr ext::oneapi::experimental::architecture max_architecture = - ext::oneapi::experimental::architecture::amd_gpu_gfx1032; + ext::oneapi::experimental::architecture::amd_gpu_gfx1034; #ifndef __SYCL_TARGET_INTEL_X86_64__ #define __SYCL_TARGET_INTEL_X86_64__ 0 @@ -264,6 +265,9 @@ static constexpr ext::oneapi::experimental::architecture max_architecture = #ifndef __SYCL_TARGET_AMD_GPU_GFX1032__ #define __SYCL_TARGET_AMD_GPU_GFX1032__ 0 #endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX1034__ +#define __SYCL_TARGET_AMD_GPU_GFX1034__ 0 +#endif // This is true when the translation unit is compiled in AOT mode with target // names that supports the "if_architecture_is" features. If an unsupported @@ -328,7 +332,8 @@ static constexpr bool is_allowable_aot_mode = (__SYCL_TARGET_AMD_GPU_GFX1013__ == 1) || (__SYCL_TARGET_AMD_GPU_GFX1030__ == 1) || (__SYCL_TARGET_AMD_GPU_GFX1031__ == 1) || - (__SYCL_TARGET_AMD_GPU_GFX1032__ == 1); + (__SYCL_TARGET_AMD_GPU_GFX1032__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX1034__ == 1); struct IsAOTForArchitectureClass { // Allocate an array of size == size of @@ -451,6 +456,8 @@ struct IsAOTForArchitectureClass { __SYCL_TARGET_AMD_GPU_GFX1031__ == 1; arr[static_cast(arch::amd_gpu_gfx1032)] = __SYCL_TARGET_AMD_GPU_GFX1032__ == 1; + arr[static_cast(arch::amd_gpu_gfx1034)] = + __SYCL_TARGET_AMD_GPU_GFX1034__ == 1; } }; From 5129031904f49caa766a0df89bb7f70e4616d424 Mon Sep 17 00:00:00 2001 From: M Moadeli Date: Wed, 1 Feb 2023 22:58:58 +0000 Subject: [PATCH 2/5] - Change the "Feature test macro" section of the document to follow the wording in the "experimental extension" - Remov `Added in version` for supported architectures. --- ...cl_ext_oneapi_device_architecture.asciidoc | 157 +++++++++--------- sycl/include/sycl/feature_test.hpp.in | 2 +- 2 files changed, 79 insertions(+), 80 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc index 0d71f8008d7b8..9c7cb2043f99b 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc @@ -76,13 +76,12 @@ this extension enables such applications. === Feature test macro -This extension provides a feature-test macro as described in the core SYCL -specification. An implementation supporting this extension must predefine the -macro `SYCL_EXT_ONEAPI_DEVICE_ARCHITECTURE` to one of the values defined in the -table below. Applications can test for the existence of this macro to -determine if the implementation supports this feature, or applications can test -the macro's value to determine which of the extension's features the -implementation supports. +This is an experimental extension specification, intended to provide early +access to features and gather community feedback. Interfaces defined in this +specification are implemented in {dpcpp}, but they are not finalized and may +change incompatibly in future versions of {dpcpp} without prior notice. +*Shipping software products should not rely on APIs defined in this +specification.* [%header,cols="1,5"] |=== @@ -194,291 +193,291 @@ of these enumerators, and it provides a brief description of their meanings. |Description |`x86_64` -|1 +|- |Any CPU device with the x86_64 instruction set. |`intel_gpu_bdw` -|1 +|- |Broadwell Intel graphics architecture. |`intel_gpu_skl` -|1 +|- |Broadwell Intel graphics architecture. |`intel_gpu_kbl` -|1 +|- |Kaby Lake Intel graphics architecture. |`intel_gpu_cfl` -|1 +|- |Coffee Lake Intel graphics architecture. |`intel_gpu_apl` -|1 +|- |Apollo Lake Intel graphics architecture. |`intel_gpu_glk` -|1 +|- |Gemini Lake Intel graphics architecture. |`intel_gpu_whl` -|1 +|- |Whiskey Lake Intel graphics architecture. |`intel_gpu_aml` -|1 +|- |Amber Lake Intel graphics architecture. |`intel_gpu_cml` -|1 +|- |Comet Lake Intel graphics architecture. |`intel_gpu_icllp` -|1 +|- |Ice Lake Intel graphics architecture. |`intel_gpu_ehl` -|1 +|- |Elkhart Lake Intel graphics architecture. |`intel_gpu_tgllp` -|1 +|- |Tiger Lake Intel graphics architecture. |`intel_gpu_rkl` -|1 +|- |Rocket Lake Intel graphics architecture. |`intel_gpu_adl_s` -|1 +|- |Alder Lake S Intel graphics architecture. |`intel_gpu_rpl_s` -|1 +|- |Raptor Lake Intel graphics architecture. |`intel_gpu_adl_p` -|1 +|- |Alder Lake P Intel graphics architecture. |`intel_gpu_adl_n` -|1 +|- |Alder Lake N Intel graphics architecture. |`intel_gpu_dg1` -|1 +|- |DG1 Intel graphics architecture. |`intel_gpu_acm_g10` -|1 +|- |Alchemist G10 Intel graphics architecture. |`intel_gpu_acm_g11` -|1 +|- |Alchemist G11 Intel graphics architecture. |`intel_gpu_acm_g12` -|1 +|- |Alchemist G12 Intel graphics architecture. |`intel_gpu_pvc` -|1 +|- |Ponte Vecchio Intel graphics architecture. |`intel_gpu_8_0_0` -|1 +|- |Alias for `intel_gpu_bdw`. |`intel_gpu_9_0_9` -|1 +|- |Alias for `intel_gpu_skl`. |`intel_gpu_9_1_9` -|1 +|- |Alias for `intel_gpu_kbl`. |`intel_gpu_9_2_9` -|1 +|- |Alias for `intel_gpu_cfl`. |`intel_gpu_9_3_0` -|1 +|- |Alias for `intel_gpu_apl`. |`intel_gpu_9_4_0` -|1 +|- |Alias for `intel_gpu_glk`. |`intel_gpu_9_5_0` -|1 +|- |Alias for `intel_gpu_whl`. |`intel_gpu_9_6_0` -|1 +|- |Alias for `intel_gpu_aml`. |`intel_gpu_9_7_0` -|1 +|- |Alias for `intel_gpu_cml`. |`intel_gpu_11_0_0` -|1 +|- |Alias for `intel_gpu_icllp`. |`intel_gpu_11_2_0` -|1 +|- |Alias for `intel_gpu_ehl`. |`intel_gpu_12_0_0` -|1 +|- |Alias for `intel_gpu_tgllp`. |`intel_gpu_12_10_0` -|1 +|- |Alias for `intel_gpu_dg1`. |`nvidia_gpu_sm_50` -|2 +|- |NVIDIA Maxwell architecture (compute capability 5.0). |`nvidia_gpu_sm_52` -|2 +|- |NVIDIA Maxwell architecture (compute capability 5.2). |`nvidia_gpu_sm_53` -|2 +|- |NVIDIA Maxwell architecture (compute capability 5.3). |`nvidia_gpu_sm_60` -|2 +|- |NVIDIA Pascal architecture (compute capability 6.0). |`nvidia_gpu_sm_61` -|2 +|- |NVIDIA Pascal architecture (compute capability 6.1). |`nvidia_gpu_sm_62` -|2 +|- |NVIDIA Pascal architecture (compute capability 6.2). |`nvidia_gpu_sm_70` -|2 +|- |NVIDIA Volta architecture (compute capability 7.0). |`nvidia_gpu_sm_72` -|2 +|- |NVIDIA Volta architecture (compute capability 7.2). |`nvidia_gpu_sm_75` -|2 +|- |NVIDIA Turing architecture (compute capability 7.5). |`nvidia_gpu_sm_80` -|2 +|- |NVIDIA Ampere architecture (compute capability 8.0). |`nvidia_gpu_sm_86` -|2 +|- |NVIDIA Ampere architecture (compute capability 8.6). |`nvidia_gpu_sm_87` -|2 +|- |Jetson/Drive AGX Orin architecture. |`nvidia_gpu_sm_89` -|2 +|- |NVIDIA Ada Lovelace architecture. |`nvidia_gpu_sm_90` -|2 +|- |NVIDIA Hopper architecture. |`amd_gpu_gfx700` -|2 +|- |AMD GCN GFX7 (Sea Islands (CI)) architecture. |`amd_gpu_gfx701` -|2 +|- |AMD GCN GFX7 (Sea Islands (CI)) architecture. |`amd_gpu_gfx702` -|2 +|- |AMD GCN GFX7 (Sea Islands (CI)) architecture. |`amd_gpu_gfx801` -|2 +|- |AMD GCN GFX8 (Volcanic Islands (VI)) architecture. |`amd_gpu_gfx802` -|2 +|- |AMD GCN GFX8 (Volcanic Islands (VI)) architecture. |`amd_gpu_gfx803` -|2 +|- |AMD GCN GFX8 (Volcanic Islands (VI)) architecture. |`amd_gpu_gfx805` -|2 +|- |AMD GCN GFX8 (Volcanic Islands (VI)) architecture. |`amd_gpu_gfx810` -|2 +|- |AMD GCN GFX8 (Volcanic Islands (VI)) architecture. |`amd_gpu_gfx900` -|2 +|- |AMD GCN GFX9 (Vega) architecture. |`amd_gpu_gfx902` -|2 +|- |AMD GCN GFX9 (Vega) architecture. |`amd_gpu_gfx904` -|2 +|- |AMD GCN GFX9 (Vega) architecture. |`amd_gpu_gfx906` -|2 +|- |AMD GCN GFX9 (Vega) architecture. |`amd_gpu_gfx908` -|2 +|- |AMD GCN GFX9 (Vega) architecture. |`amd_gpu_gfx90a` -|2 +|- |AMD GCN GFX9 (Vega) architecture. |`amd_gpu_gfx1010` -|2 +|- |AMD GCN GFX10.1 (RDNA 1) architecture. |`amd_gpu_gfx1011` -|2 +|- |AMD GCN GFX10.1 (RDNA 1) architecture. |`amd_gpu_gfx1012` -|2 +|- |AMD GCN GFX10.1 (RDNA 1) architecture. |`amd_gpu_gfx1013` -|2 +|- |AMD GCN GFX10.1 (RDNA 1) architecture. |`amd_gpu_gfx1030` -|2 +|- |AMD GCN GFX10.3 (RDNA 2) architecture. |`amd_gpu_gfx1031` -|2 +|- |GCN GFX10.3 (RDNA 2) architecture. |`amd_gpu_gfx1032` -|2 +|- |GCN GFX10.3 (RDNA 2) architecture. |`amd_gpu_gfx1034` -|3 +|- |GCN GFX10.3 (RDNA 2) architecture. |=== diff --git a/sycl/include/sycl/feature_test.hpp.in b/sycl/include/sycl/feature_test.hpp.in index 81dcd9b4bb997..93d7f12207dcd 100644 --- a/sycl/include/sycl/feature_test.hpp.in +++ b/sycl/include/sycl/feature_test.hpp.in @@ -30,7 +30,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) { // TODO: Move these feature-test macros to compiler driver. #define SYCL_EXT_INTEL_DEVICE_INFO 6 -#define SYCL_EXT_INTEL_DEVICE_ARCHITECTURE 1 +#define SYCL_EXT_ONEAPI_DEVICE_ARCHITECTURE #define SYCL_EXT_ONEAPI_SUB_GROUP_MASK 1 #define SYCL_EXT_ONEAPI_LOCAL_MEMORY 1 #define SYCL_EXT_ONEAPI_MATRIX 1 From 7e67dc683796cfe93c929c9561a10216665662eb Mon Sep 17 00:00:00 2001 From: M Moadeli Date: Thu, 2 Feb 2023 14:23:06 +0000 Subject: [PATCH 3/5] - Undo earlier change to Feature test macro part - Adds experimental table instead of version one. --- ...cl_ext_oneapi_device_architecture.asciidoc | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc index 9c7cb2043f99b..2dc67c02d6689 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc @@ -76,23 +76,22 @@ this extension enables such applications. === Feature test macro -This is an experimental extension specification, intended to provide early -access to features and gather community feedback. Interfaces defined in this -specification are implemented in {dpcpp}, but they are not finalized and may -change incompatibly in future versions of {dpcpp} without prior notice. -*Shipping software products should not rely on APIs defined in this -specification.* - +This extension provides a feature-test macro as described in the core SYCL + specification. An implementation supporting this extension must predefine the + macro `SYCL_EXT_ONEAPI_DEVICE_ARCHITECTURE` to one of the values defined in the + table below. Applications can test for the existence of this macro to + determine if the implementation supports this feature, or applications can test + the macro's value to determine which of the extension's features the + implementation supports. + [%header,cols="1,5"] |=== |Value |Description |1 -|Initial version of this extension. - -|2 -|NVIDIA and AMD architectures are added. +|The APIs of this experimental extension are not versioned, so the + feature-test macro always has this value. |=== === New enumeration of architectures From 74b96186366745fbbf8d8e6d63f51d3a668bfa01 Mon Sep 17 00:00:00 2001 From: M Moadeli Date: Thu, 2 Feb 2023 14:31:47 +0000 Subject: [PATCH 4/5] Minor fix to doc --- .../sycl_ext_oneapi_device_architecture.asciidoc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc index 2dc67c02d6689..50f4223ef9473 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc @@ -77,12 +77,12 @@ this extension enables such applications. === Feature test macro This extension provides a feature-test macro as described in the core SYCL - specification. An implementation supporting this extension must predefine the - macro `SYCL_EXT_ONEAPI_DEVICE_ARCHITECTURE` to one of the values defined in the - table below. Applications can test for the existence of this macro to - determine if the implementation supports this feature, or applications can test - the macro's value to determine which of the extension's features the - implementation supports. +specification. An implementation supporting this extension must predefine the +macro `SYCL_EXT_ONEAPI_DEVICE_ARCHITECTURE` to one of the values defined in the +table below. Applications can test for the existence of this macro to +determine if the implementation supports this feature, or applications can test +the macro's value to determine which of the extension's features the +implementation supports. [%header,cols="1,5"] |=== From 26428a04e0cbac00cfba99f4d8ffe0c884506fd1 Mon Sep 17 00:00:00 2001 From: M Moadeli Date: Fri, 3 Feb 2023 15:06:21 +0000 Subject: [PATCH 5/5] Define SYCL_EXT_ONEAPI_DEVICE_ARCHITECTURE to 1. --- sycl/include/sycl/feature_test.hpp.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/feature_test.hpp.in b/sycl/include/sycl/feature_test.hpp.in index 93d7f12207dcd..9b5d74d2ee13a 100644 --- a/sycl/include/sycl/feature_test.hpp.in +++ b/sycl/include/sycl/feature_test.hpp.in @@ -30,7 +30,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) { // TODO: Move these feature-test macros to compiler driver. #define SYCL_EXT_INTEL_DEVICE_INFO 6 -#define SYCL_EXT_ONEAPI_DEVICE_ARCHITECTURE +#define SYCL_EXT_ONEAPI_DEVICE_ARCHITECTURE 1 #define SYCL_EXT_ONEAPI_SUB_GROUP_MASK 1 #define SYCL_EXT_ONEAPI_LOCAL_MEMORY 1 #define SYCL_EXT_ONEAPI_MATRIX 1