diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 9274c0f046c80..dc87ff3685201 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -693,6 +693,7 @@ StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) { .Case("amd_gpu_gfx1030", "gfx1030") .Case("amd_gpu_gfx1031", "gfx1031") .Case("amd_gpu_gfx1032", "gfx1032") + .Case("amd_gpu_gfx1034", "gfx1034") .Default(""); return Device; } @@ -757,6 +758,7 @@ SmallString<64> SYCL::gen::getGenDeviceMacro(StringRef DeviceName) { .Case("gfx1030", "AMD_GPU_GFX1030") .Case("gfx1031", "AMD_GPU_GFX1031") .Case("gfx1032", "AMD_GPU_GFX1032") + .Case("gfx1034", "AMD_GPU_GFX1034") .Default(""); if (!Ext.empty()) { Macro = "__SYCL_TARGET_"; diff --git a/clang/test/Driver/sycl-oneapi-gpu.cpp b/clang/test/Driver/sycl-oneapi-gpu.cpp index 1ecc17734448e..618b4a5d9bb57 100644 --- a/clang/test/Driver/sycl-oneapi-gpu.cpp +++ b/clang/test/Driver/sycl-oneapi-gpu.cpp @@ -166,12 +166,13 @@ // RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1031 -DMAC_STR=GFX1031 // RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1032 -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1032 -DMAC_STR=GFX1032 +// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx1034 -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx1034 -DMAC_STR=GFX1034 // MACRO_AMD: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" // MACRO_AMD: "-D__SYCL_TARGET_AMD_GPU_[[MAC_STR]]__" // DEVICE_AMD: clang-offload-wrapper{{.*}} "-compile-opts=--offload-arch=[[DEV_STR]]{{.*}}" // MACRO_AMD: clang{{.*}} "-fsycl-is-host" // MACRO_AMD: "-D__SYCL_TARGET_AMD_GPU_[[MAC_STR]]__" - /// -fsycl-targets=spir64_x86_64 should set a specific macro // RUN: %clangxx -c -fsycl -fsycl-targets=spir64_x86_64 -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=MACRO_X86_64 diff --git a/sycl/doc/UsersManual.md b/sycl/doc/UsersManual.md index 47de3a3c4412b..0a246103b602d 100644 --- a/sycl/doc/UsersManual.md +++ b/sycl/doc/UsersManual.md @@ -92,6 +92,7 @@ and not recommended to use in production environment. * amd_gpu_gfx1030 - AMD GCN GFX10.3 (RDNA 2) architecture * amd_gpu_gfx1031 - GCN GFX10.3 (RDNA 2) architecture * amd_gpu_gfx1032 - GCN GFX10.3 (RDNA 2) architecture + * amd_gpu_gfx1034 - GCN GFX10.3 (RDNA 2) architecture ## Language options diff --git a/sycl/doc/design/DeviceIf.md b/sycl/doc/design/DeviceIf.md index 1c2333e815796..f73645a35c66b 100644 --- a/sycl/doc/design/DeviceIf.md +++ b/sycl/doc/design/DeviceIf.md @@ -108,6 +108,7 @@ recognizes: * `amd_gpu_gfx1030` * `amd_gpu_gfx1031` * `amd_gpu_gfx1032` +* `amd_gpu_gfx1034` The above listed device names may not be mixed with the existing target name `spir64_gen` on the same command line. In addition, the user must not pass the @@ -190,6 +191,7 @@ one of the following corresponding C++ macro names: * `__SYCL_TARGET_AMD_GPU_GFX1030__` * `__SYCL_TARGET_AMD_GPU_GFX1031__` * `__SYCL_TARGET_AMD_GPU_GFX1032__` +* `__SYCL_TARGET_AMD_GPU_GFX1034__` If the user invokes the compiler driver with `-fsycl-targets=spir64_x86_64`, the compiler driver must predefine the following C++ macro name: diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc index 442308c016dd5..50f4223ef9473 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc @@ -83,17 +83,15 @@ table below. Applications can test for the existence of this macro to determine if the implementation supports this feature, or applications can test the macro's value to determine which of the extension's features the implementation supports. - + [%header,cols="1,5"] |=== |Value |Description |1 -|Initial version of this extension. - -|2 -|NVIDIA and AMD architectures are added. +|The APIs of this experimental extension are not versioned, so the + feature-test macro always has this value. |=== === New enumeration of architectures @@ -164,6 +162,7 @@ enum class architecture : /* unspecified */ { amd_gpu_gfx1030, amd_gpu_gfx1031, amd_gpu_gfx1032, + amd_gpu_gfx1034, intel_gpu_8_0_0 = intel_gpu_bdw, intel_gpu_9_0_9 = intel_gpu_skl, @@ -193,287 +192,291 @@ of these enumerators, and it provides a brief description of their meanings. |Description |`x86_64` -|1 +|- |Any CPU device with the x86_64 instruction set. |`intel_gpu_bdw` -|1 +|- |Broadwell Intel graphics architecture. |`intel_gpu_skl` -|1 +|- |Broadwell Intel graphics architecture. |`intel_gpu_kbl` -|1 +|- |Kaby Lake Intel graphics architecture. |`intel_gpu_cfl` -|1 +|- |Coffee Lake Intel graphics architecture. |`intel_gpu_apl` -|1 +|- |Apollo Lake Intel graphics architecture. |`intel_gpu_glk` -|1 +|- |Gemini Lake Intel graphics architecture. |`intel_gpu_whl` -|1 +|- |Whiskey Lake Intel graphics architecture. |`intel_gpu_aml` -|1 +|- |Amber Lake Intel graphics architecture. |`intel_gpu_cml` -|1 +|- |Comet Lake Intel graphics architecture. |`intel_gpu_icllp` -|1 +|- |Ice Lake Intel graphics architecture. |`intel_gpu_ehl` -|1 +|- |Elkhart Lake Intel graphics architecture. |`intel_gpu_tgllp` -|1 +|- |Tiger Lake Intel graphics architecture. |`intel_gpu_rkl` -|1 +|- |Rocket Lake Intel graphics architecture. |`intel_gpu_adl_s` -|1 +|- |Alder Lake S Intel graphics architecture. |`intel_gpu_rpl_s` -|1 +|- |Raptor Lake Intel graphics architecture. |`intel_gpu_adl_p` -|1 +|- |Alder Lake P Intel graphics architecture. |`intel_gpu_adl_n` -|1 +|- |Alder Lake N Intel graphics architecture. |`intel_gpu_dg1` -|1 +|- |DG1 Intel graphics architecture. |`intel_gpu_acm_g10` -|1 +|- |Alchemist G10 Intel graphics architecture. |`intel_gpu_acm_g11` -|1 +|- |Alchemist G11 Intel graphics architecture. |`intel_gpu_acm_g12` -|1 +|- |Alchemist G12 Intel graphics architecture. |`intel_gpu_pvc` -|1 +|- |Ponte Vecchio Intel graphics architecture. |`intel_gpu_8_0_0` -|1 +|- |Alias for `intel_gpu_bdw`. |`intel_gpu_9_0_9` -|1 +|- |Alias for `intel_gpu_skl`. |`intel_gpu_9_1_9` -|1 +|- |Alias for `intel_gpu_kbl`. |`intel_gpu_9_2_9` -|1 +|- |Alias for `intel_gpu_cfl`. |`intel_gpu_9_3_0` -|1 +|- |Alias for `intel_gpu_apl`. |`intel_gpu_9_4_0` -|1 +|- |Alias for `intel_gpu_glk`. |`intel_gpu_9_5_0` -|1 +|- |Alias for `intel_gpu_whl`. |`intel_gpu_9_6_0` -|1 +|- |Alias for `intel_gpu_aml`. |`intel_gpu_9_7_0` -|1 +|- |Alias for `intel_gpu_cml`. |`intel_gpu_11_0_0` -|1 +|- |Alias for `intel_gpu_icllp`. |`intel_gpu_11_2_0` -|1 +|- |Alias for `intel_gpu_ehl`. |`intel_gpu_12_0_0` -|1 +|- |Alias for `intel_gpu_tgllp`. |`intel_gpu_12_10_0` -|1 +|- |Alias for `intel_gpu_dg1`. |`nvidia_gpu_sm_50` -|2 +|- |NVIDIA Maxwell architecture (compute capability 5.0). |`nvidia_gpu_sm_52` -|2 +|- |NVIDIA Maxwell architecture (compute capability 5.2). |`nvidia_gpu_sm_53` -|2 +|- |NVIDIA Maxwell architecture (compute capability 5.3). |`nvidia_gpu_sm_60` -|2 +|- |NVIDIA Pascal architecture (compute capability 6.0). |`nvidia_gpu_sm_61` -|2 +|- |NVIDIA Pascal architecture (compute capability 6.1). |`nvidia_gpu_sm_62` -|2 +|- |NVIDIA Pascal architecture (compute capability 6.2). |`nvidia_gpu_sm_70` -|2 +|- |NVIDIA Volta architecture (compute capability 7.0). |`nvidia_gpu_sm_72` -|2 +|- |NVIDIA Volta architecture (compute capability 7.2). |`nvidia_gpu_sm_75` -|2 +|- |NVIDIA Turing architecture (compute capability 7.5). |`nvidia_gpu_sm_80` -|2 +|- |NVIDIA Ampere architecture (compute capability 8.0). |`nvidia_gpu_sm_86` -|2 +|- |NVIDIA Ampere architecture (compute capability 8.6). |`nvidia_gpu_sm_87` -|2 +|- |Jetson/Drive AGX Orin architecture. |`nvidia_gpu_sm_89` -|2 +|- |NVIDIA Ada Lovelace architecture. |`nvidia_gpu_sm_90` -|2 +|- |NVIDIA Hopper architecture. |`amd_gpu_gfx700` -|2 +|- |AMD GCN GFX7 (Sea Islands (CI)) architecture. |`amd_gpu_gfx701` -|2 +|- |AMD GCN GFX7 (Sea Islands (CI)) architecture. |`amd_gpu_gfx702` -|2 +|- |AMD GCN GFX7 (Sea Islands (CI)) architecture. |`amd_gpu_gfx801` -|2 +|- |AMD GCN GFX8 (Volcanic Islands (VI)) architecture. |`amd_gpu_gfx802` -|2 +|- |AMD GCN GFX8 (Volcanic Islands (VI)) architecture. |`amd_gpu_gfx803` -|2 +|- |AMD GCN GFX8 (Volcanic Islands (VI)) architecture. |`amd_gpu_gfx805` -|2 +|- |AMD GCN GFX8 (Volcanic Islands (VI)) architecture. |`amd_gpu_gfx810` -|2 +|- |AMD GCN GFX8 (Volcanic Islands (VI)) architecture. |`amd_gpu_gfx900` -|2 +|- |AMD GCN GFX9 (Vega) architecture. |`amd_gpu_gfx902` -|2 +|- |AMD GCN GFX9 (Vega) architecture. |`amd_gpu_gfx904` -|2 +|- |AMD GCN GFX9 (Vega) architecture. |`amd_gpu_gfx906` -|2 +|- |AMD GCN GFX9 (Vega) architecture. |`amd_gpu_gfx908` -|2 +|- |AMD GCN GFX9 (Vega) architecture. |`amd_gpu_gfx90a` -|2 +|- |AMD GCN GFX9 (Vega) architecture. |`amd_gpu_gfx1010` -|2 +|- |AMD GCN GFX10.1 (RDNA 1) architecture. |`amd_gpu_gfx1011` -|2 +|- |AMD GCN GFX10.1 (RDNA 1) architecture. |`amd_gpu_gfx1012` -|2 +|- |AMD GCN GFX10.1 (RDNA 1) architecture. |`amd_gpu_gfx1013` -|2 +|- |AMD GCN GFX10.1 (RDNA 1) architecture. |`amd_gpu_gfx1030` -|2 +|- |AMD GCN GFX10.3 (RDNA 2) architecture. |`amd_gpu_gfx1031` -|2 +|- |GCN GFX10.3 (RDNA 2) architecture. |`amd_gpu_gfx1032` -|2 +|- +|GCN GFX10.3 (RDNA 2) architecture. + +|`amd_gpu_gfx1034` +|- |GCN GFX10.3 (RDNA 2) architecture. |=== diff --git a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp index 61472815b404d..eb8df421f594a 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp @@ -67,6 +67,7 @@ enum class architecture { amd_gpu_gfx1030, amd_gpu_gfx1031, amd_gpu_gfx1032, + amd_gpu_gfx1034, // Update "detail::max_architecture" below if you add new elements here! intel_gpu_8_0_0 = intel_gpu_bdw, intel_gpu_9_0_9 = intel_gpu_skl, @@ -88,7 +89,7 @@ enum class architecture { namespace detail { static constexpr ext::oneapi::experimental::architecture max_architecture = - ext::oneapi::experimental::architecture::amd_gpu_gfx1032; + ext::oneapi::experimental::architecture::amd_gpu_gfx1034; #ifndef __SYCL_TARGET_INTEL_X86_64__ #define __SYCL_TARGET_INTEL_X86_64__ 0 @@ -264,6 +265,9 @@ static constexpr ext::oneapi::experimental::architecture max_architecture = #ifndef __SYCL_TARGET_AMD_GPU_GFX1032__ #define __SYCL_TARGET_AMD_GPU_GFX1032__ 0 #endif +#ifndef __SYCL_TARGET_AMD_GPU_GFX1034__ +#define __SYCL_TARGET_AMD_GPU_GFX1034__ 0 +#endif // This is true when the translation unit is compiled in AOT mode with target // names that supports the "if_architecture_is" features. If an unsupported @@ -328,7 +332,8 @@ static constexpr bool is_allowable_aot_mode = (__SYCL_TARGET_AMD_GPU_GFX1013__ == 1) || (__SYCL_TARGET_AMD_GPU_GFX1030__ == 1) || (__SYCL_TARGET_AMD_GPU_GFX1031__ == 1) || - (__SYCL_TARGET_AMD_GPU_GFX1032__ == 1); + (__SYCL_TARGET_AMD_GPU_GFX1032__ == 1) || + (__SYCL_TARGET_AMD_GPU_GFX1034__ == 1); struct IsAOTForArchitectureClass { // Allocate an array of size == size of @@ -451,6 +456,8 @@ struct IsAOTForArchitectureClass { __SYCL_TARGET_AMD_GPU_GFX1031__ == 1; arr[static_cast(arch::amd_gpu_gfx1032)] = __SYCL_TARGET_AMD_GPU_GFX1032__ == 1; + arr[static_cast(arch::amd_gpu_gfx1034)] = + __SYCL_TARGET_AMD_GPU_GFX1034__ == 1; } }; diff --git a/sycl/include/sycl/feature_test.hpp.in b/sycl/include/sycl/feature_test.hpp.in index 81dcd9b4bb997..9b5d74d2ee13a 100644 --- a/sycl/include/sycl/feature_test.hpp.in +++ b/sycl/include/sycl/feature_test.hpp.in @@ -30,7 +30,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) { // TODO: Move these feature-test macros to compiler driver. #define SYCL_EXT_INTEL_DEVICE_INFO 6 -#define SYCL_EXT_INTEL_DEVICE_ARCHITECTURE 1 +#define SYCL_EXT_ONEAPI_DEVICE_ARCHITECTURE 1 #define SYCL_EXT_ONEAPI_SUB_GROUP_MASK 1 #define SYCL_EXT_ONEAPI_LOCAL_MEMORY 1 #define SYCL_EXT_ONEAPI_MATRIX 1