-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[NVPTX] Remove nvvm.bitcast.* intrinsics #107936
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NVPTX] Remove nvvm.bitcast.* intrinsics #107936
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-nvptx Author: Alex MacLean (AlexMaclean) ChangesRemove the following intrinsics which correspond directly to a bitcast:
Full diff: https://github.com/llvm/llvm-project/pull/107936.diff 5 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index 20f038a0a9bbde..6fff562165080a 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -599,14 +599,6 @@ TARGET_BUILTIN(__nvvm_e4m3x2_to_f16x2_rn_relu, "V2hs", "", AND(SM_89,PTX81))
TARGET_BUILTIN(__nvvm_e5m2x2_to_f16x2_rn, "V2hs", "", AND(SM_89,PTX81))
TARGET_BUILTIN(__nvvm_e5m2x2_to_f16x2_rn_relu, "V2hs", "", AND(SM_89,PTX81))
-// Bitcast
-
-BUILTIN(__nvvm_bitcast_f2i, "if", "")
-BUILTIN(__nvvm_bitcast_i2f, "fi", "")
-
-BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "")
-BUILTIN(__nvvm_bitcast_d2ll, "LLid", "")
-
// FNS
TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", PTX60)
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 39685c920d948d..737dd6092e2183 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -30,6 +30,10 @@
// * llvm.nvvm.max.ui --> select(x ule y, x, y)
// * llvm.nvvm.max.ull --> ibid.
// * llvm.nvvm.h2f --> llvm.convert.to.fp16.f32
+// * llvm.nvvm.bitcast.f2i --> bitcast
+// * llvm.nvvm.bitcast.i2f --> ibid.
+// * llvm.nvvm.bitcast.d2ll --> ibid.
+// * llvm.nvvm.bitcast.ll2d --> ibid.
def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr
def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr
@@ -1339,20 +1343,6 @@ let TargetPrefix = "nvvm" in {
def int_nvvm_e5m2x2_to_f16x2_rn_relu : ClangBuiltin<"__nvvm_e5m2x2_to_f16x2_rn_relu">,
Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>;
-//
-// Bitcast
-//
-
- def int_nvvm_bitcast_f2i : ClangBuiltin<"__nvvm_bitcast_f2i">,
- DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_bitcast_i2f : ClangBuiltin<"__nvvm_bitcast_i2f">,
- DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
-
- def int_nvvm_bitcast_ll2d : ClangBuiltin<"__nvvm_bitcast_ll2d">,
- DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
- def int_nvvm_bitcast_d2ll : ClangBuiltin<"__nvvm_bitcast_d2ll">,
- DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
-
// FNS
def int_nvvm_fns : ClangBuiltin<"__nvvm_fns">,
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 69dae5e32dbbe8..02d1d9d9f78984 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1268,6 +1268,10 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
else if (Name.consume_front("atomic.load.add."))
// nvvm.atomic.load.add.{f32.p,f64.p}
Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
+ else if (Name.consume_front("bitcast."))
+ // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
+ Expand =
+ Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
else
Expand = false;
@@ -4258,6 +4262,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
F->getParent(), Intrinsic::convert_from_fp16,
{Builder.getFloatTy()}),
CI->getArgOperand(0), "h2f");
+ } else if (Name.consume_front("bitcast.") &&
+ (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
+ Name == "d2ll")) {
+ Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
} else {
Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
if (IID != Intrinsic::not_intrinsic &&
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 0c883093dd0a54..5c2ef4fa417ac1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1561,20 +1561,6 @@ def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn Int16Regs:$a),
def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn_relu Int16Regs:$a),
(CVT_f16x2_e5m2x2 Int16Regs:$a, CvtRN_RELU)>;
-//
-// Bitcast
-//
-
-def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
- Float32Regs, int_nvvm_bitcast_f2i>;
-def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
- Int32Regs, int_nvvm_bitcast_i2f>;
-
-def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
- Int64Regs, int_nvvm_bitcast_ll2d>;
-def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
- Float64Regs, int_nvvm_bitcast_d2ll>;
-
//
// FNS
//
diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
index 1c11e1221fef34..7e4a4d527fc903 100644
--- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
+++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
@@ -26,6 +26,11 @@ declare i16 @llvm.nvvm.min.us(i16, i16)
declare i32 @llvm.nvvm.min.ui(i32, i32)
declare i64 @llvm.nvvm.min.ull(i64, i64)
+declare i32 @llvm.nvvm.bitcast.f2i(float)
+declare float @llvm.nvvm.bitcast.i2f(i32)
+declare i64 @llvm.nvvm.bitcast.d2ll(double)
+declare double @llvm.nvvm.bitcast.ll2d(i64)
+
; CHECK-LABEL: @simple_upgrade
define void @simple_upgrade(i32 %a, i64 %b, i16 %c) {
; CHECK: call i32 @llvm.bitreverse.i32(i32 %a)
@@ -120,3 +125,18 @@ define void @min_max(i16 %a1, i16 %a2, i32 %b1, i32 %b2, i64 %c1, i64 %c2) {
ret void
}
+
+; CHECK-LABEL: @bitcast
+define void @bitcast(i32 %a, i64 %b, float %c, double %d) {
+; CHECK: bitcast float %c to i32
+; CHECK: bitcast i32 %a to float
+; CHECK: bitcast double %d to i64
+; CHECK: bitcast i64 %b to double
+;
+ %r1 = call i32 @llvm.nvvm.bitcast.f2i(float %c)
+ %r2 = call float @llvm.nvvm.bitcast.i2f(i32 %a)
+ %r3 = call i64 @llvm.nvvm.bitcast.d2ll(double %d)
+ %r4 = call double @llvm.nvvm.bitcast.ll2d(i64 %b)
+
+ ret void
+}
\ No newline at end of file
|
It may be worth adding a note about this in the release notes. |
I'm not familiar with these, can you point me to an analogous change I could use as an example? |
[LLD] Add CLASS syntax to SECTIONS |
611eff7
to
38acf35
Compare
@@ -599,14 +599,6 @@ TARGET_BUILTIN(__nvvm_e4m3x2_to_f16x2_rn_relu, "V2hs", "", AND(SM_89,PTX81)) | |||
TARGET_BUILTIN(__nvvm_e5m2x2_to_f16x2_rn, "V2hs", "", AND(SM_89,PTX81)) | |||
TARGET_BUILTIN(__nvvm_e5m2x2_to_f16x2_rn_relu, "V2hs", "", AND(SM_89,PTX81)) | |||
|
|||
// Bitcast |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Thank you, past me, for the commit message.) The key in the commit message from the previous commit is
These builtins don't need to be backfilled in clang: They're not accessible to user code from nvcc.
If they were accessible from nvcc, we could have compatibility issues where user code (or, just as likely, CUDA headers) expects the builtin to be there. In which case we might want to keep the builtin (in the same way that we auto-upgrade the llvm intrinsic).
I tried to check using cuda.godbolt.org whether these builtins exist in nvcc, but at this very moment it seems to be down, it's not compiling anything.
Assuming that something like the following does not compile in nvcc, I think this is fine.
__global__ void foo() {
__nvvm_bitcast_f2i(0.0);
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! I've just confirmed these do not work in nvcc.
Remove the following intrinsics which correspond directly to a bitcast: