[SYCL][NVPTX] Obey -fcuda-short-ptr when compiling SYCL for NVPTX (#15642)

frasercrmck · web-flow · commit 83fe1c178217 · 2024-12-11T11:27:36.000Z
This flag turns pointers to CUDA's `shared`, `const`, and `local`
address spaces into 32-bit pointers. This can potentially save on
registers used for addressing calculations.

This option was being accepted by the frontend when compiling SYCL code,
but was then reporting an error that the backend datalayout doesn't
match the expected target description. This was because the option
wasn't being caught by all parts of the toolchain, leading to
inconsistencies.

This PR allows users to pass the option if they wish. They will see a
warning that the compiler is linking against a libclc/libspirv that
hasn't been compiled with this option, but this is likely harmless since
libspirv doesn't manipulate pointers.
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -8593,7 +8593,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
-  if (IsCuda) {
+  // Propagate -fcuda-short-ptr if compiling CUDA or SYCL for NVPTX
+  if (IsCuda || (IsSYCLDevice && Triple.isNVPTX())) {
     if (Args.hasFlag(options::OPT_fcuda_short_ptr,
                      options::OPT_fno_cuda_short_ptr, false))
       CmdArgs.push_back("-fcuda-short-ptr");
diff --git a/clang/test/CodeGenSYCL/nvptx-short-ptr.cpp b/clang/test/CodeGenSYCL/nvptx-short-ptr.cpp
@@ -0,0 +1,27 @@
+// Check that when we see the expected data layouts for NVPTX when we pass the
+// -nvptx-short-ptr option.
+
+// RUN: %clang_cc1 -fsycl-is-device -disable-llvm-passes \
+// RUN:  -triple nvptx-nvidia-cuda -emit-llvm %s -o - \
+// RUN:    | FileCheck %s --check-prefix CHECK32
+
+// RUN: %clang_cc1 -fsycl-is-device -disable-llvm-passes \
+// RUN:  -triple nvptx-nvidia-cuda -emit-llvm -fcuda-short-ptr -mllvm -nvptx-short-ptr %s -o - \
+// RUN:    | FileCheck %s --check-prefix CHECK32
+
+// RUN: %clang_cc1 -fsycl-is-device -disable-llvm-passes \
+// RUN:  -triple nvptx64-nvidia-cuda -emit-llvm %s -o - \
+// RUN:    | FileCheck %s --check-prefix CHECK64-DEFAULT
+
+// RUN: %clang_cc1 -fsycl-is-device -disable-llvm-passes \
+// RUN:  -triple nvptx64-nvidia-cuda -emit-llvm -fcuda-short-ptr -mllvm -nvptx-short-ptr %s -o - \
+// RUN:    | FileCheck %s --check-prefix CHECK64-SHORT
+
+// Targeting a 32-bit NVPTX, check that we see universal 32-bit pointers (the
+// option changes nothing)
+// CHECK32: target datalayout = "e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+
+// Targeting a 64-bit NVPTX target, check that we see 32-bit pointers for
+// shared (3), const (4), and local (5) address spaces only.
+// CHECK64-DEFAULT: target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+// CHECK64-SHORT: target datalayout = "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"
diff --git a/clang/test/Driver/sycl-nvptx-short-ptr.cpp b/clang/test/Driver/sycl-nvptx-short-ptr.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang -### -nocudalib \
+// RUN:   -fsycl -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \
+// RUN: | FileCheck --check-prefix=CHECK-DEFAULT %s
+
+// RUN: %clang -### -nocudalib \
+// RUN:   -fsycl -fsycl-targets=nvptx64-nvidia-cuda -fcuda-short-ptr %s 2>&1 \
+// RUN: | FileCheck --check-prefix=CHECK-SHORT %s
+
+
+// CHECK-SHORT: "-mllvm" "--nvptx-short-ptr"
+// CHECK-SHORT: "-fcuda-short-ptr"
+
+// CHECK-DEFAULT-NOT: "--nvptx-short-ptr"
+// CHECK-DEFAULT-NOT: "-fcuda-short-ptr"

Original file line number	Diff line number	Diff line change
`@@ -8593,7 +8593,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,`
`8593`	`8593`	`}`
`8594`	`8594`	`}`
`8595`	`8595`
`8596`		`- if (IsCuda) {`
	`8596`	`+ // Propagate -fcuda-short-ptr if compiling CUDA or SYCL for NVPTX`
	`8597`	`+ if (IsCuda \|\| (IsSYCLDevice && Triple.isNVPTX())) {`
`8597`	`8598`	`if (Args.hasFlag(options::OPT_fcuda_short_ptr,`
`8598`	`8599`	`options::OPT_fno_cuda_short_ptr, false))`
`8599`	`8600`	`CmdArgs.push_back("-fcuda-short-ptr");`