diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index 3a566bbac3623..8b0b05c0ea424 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -127,69 +127,6 @@ Example: 64-bit PTX for CUDA Driver API: ``nvptx64-nvidia-cuda`` NVPTX Intrinsics ================ -Address Space Conversion ------------------------- - -'``llvm.nvvm.ptr.*.to.gen``' Intrinsics -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Syntax: -""""""" - -These are overloaded intrinsics. You can use these on any pointer types. - -.. code-block:: llvm - - declare ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1)) - declare ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3)) - declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4)) - declare ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5)) - -Overview: -""""""""" - -The '``llvm.nvvm.ptr.*.to.gen``' intrinsics convert a pointer in a non-generic -address space to a generic address space pointer. - -Semantics: -"""""""""" - -These intrinsics modify the pointer value to be a valid generic address space -pointer. - - -'``llvm.nvvm.ptr.gen.to.*``' Intrinsics -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Syntax: -""""""" - -These are overloaded intrinsics. You can use these on any pointer types. - -.. code-block:: llvm - - declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr) - declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr) - declare ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr) - declare ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr) - -Overview: -""""""""" - -The '``llvm.nvvm.ptr.gen.to.*``' intrinsics convert a pointer in the generic -address space to a pointer in the target address space. Note that these -intrinsics are only useful if the address space of the target address space of -the pointer is known. It is not legal to use address space conversion -intrinsics to convert a pointer from one non-generic address space to another -non-generic address space. - -Semantics: -"""""""""" - -These intrinsics modify the pointer value to be a valid pointer in the target -non-generic address space. - - Reading PTX Special Registers ----------------------------- diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index aa5294f5f9c90..7b8ffe417fccd 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -30,10 +30,18 @@ // * llvm.nvvm.max.ui --> select(x ule y, x, y) // * llvm.nvvm.max.ull --> ibid. // * llvm.nvvm.h2f --> llvm.convert.to.fp16.f32 -// * llvm.nvvm.bitcast.f2i --> bitcast -// * llvm.nvvm.bitcast.i2f --> ibid. -// * llvm.nvvm.bitcast.d2ll --> ibid. -// * llvm.nvvm.bitcast.ll2d --> ibid. +// * llvm.nvvm.bitcast.f2i --> bitcast +// * llvm.nvvm.bitcast.i2f --> ibid. +// * llvm.nvvm.bitcast.d2ll --> ibid. +// * llvm.nvvm.bitcast.ll2d --> ibid. +// * llvm.nvvm.ptr.gen.to.global --> addrspacecast +// * llvm.nvvm.ptr.gen.to.shared --> ibid. +// * llvm.nvvm.ptr.gen.to.constant --> ibid. +// * llvm.nvvm.ptr.gen.to.local --> ibid. +// * llvm.nvvm.ptr.global.to.gen --> ibid. +// * llvm.nvvm.ptr.shared.to.gen --> ibid. +// * llvm.nvvm.ptr.constant.to.gen --> ibid. +// * llvm.nvvm.ptr.local.to.gen --> ibid. def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr @@ -1602,40 +1610,6 @@ def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldg.global.p">; -// Use for generic pointers -// - These intrinsics are used to convert address spaces. -// - The input pointer and output pointer must have the same type, except for -// the address-space. (This restriction is not enforced here as there is -// currently no way to describe it). -// - This complements the llvm bitcast, which can be used to cast one type -// of pointer to another type of pointer, while the address space remains -// the same. -def int_nvvm_ptr_local_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.local.to.gen">; -def int_nvvm_ptr_shared_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.shared.to.gen">; -def int_nvvm_ptr_global_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.global.to.gen">; -def int_nvvm_ptr_constant_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.constant.to.gen">; - -def int_nvvm_ptr_gen_to_global: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.gen.to.global">; -def int_nvvm_ptr_gen_to_shared: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.gen.to.shared">; -def int_nvvm_ptr_gen_to_local: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.gen.to.local">; -def int_nvvm_ptr_gen_to_constant: DefaultAttrsIntrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], - "llvm.nvvm.ptr.gen.to.constant">; - // Used in nvvm internally to help address space opt and ptx code generation // This is for params that are passed to kernel functions by pointer by-val. def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty], diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 3390d651d6c69..b84258398c193 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -1275,6 +1275,16 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, else if (Name.consume_front("rotate.")) // nvvm.rotate.{b32,b64,right.b64} Expand = Name == "b32" || Name == "b64" || Name == "right.b64"; + else if (Name.consume_front("ptr.gen.to.")) + // nvvm.ptr.gen.to.{local,shared,global,constant} + Expand = Name.starts_with("local") || Name.starts_with("shared") || + Name.starts_with("global") || Name.starts_with("constant"); + else if (Name.consume_front("ptr.")) + // nvvm.ptr.{local,shared,global,constant}.to.gen + Expand = + (Name.consume_front("local") || Name.consume_front("shared") || + Name.consume_front("global") || Name.consume_front("constant")) && + Name.starts_with(".to.gen"); else Expand = false; @@ -2338,6 +2348,15 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty); Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr, {Arg, Arg, ZExtShiftAmt}); + } else if ((Name.consume_front("ptr.gen.to.") && + (Name.starts_with("local") || Name.starts_with("shared") || + Name.starts_with("global") || Name.starts_with("constant"))) || + (Name.consume_front("ptr.") && + (Name.consume_front("local") || Name.consume_front("shared") || + Name.consume_front("global") || + Name.consume_front("constant")) && + Name.starts_with(".to.gen"))) { + Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType()); } else { Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); if (IID != Intrinsic::not_intrinsic && diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 56c96ea943b89..7f942de74bdcc 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1109,11 +1109,21 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { AddrSpaceCastSDNode *CastN = cast(N); unsigned SrcAddrSpace = CastN->getSrcAddressSpace(); unsigned DstAddrSpace = CastN->getDestAddressSpace(); + SDLoc DL(N); assert(SrcAddrSpace != DstAddrSpace && "addrspacecast must be between different address spaces"); if (DstAddrSpace == ADDRESS_SPACE_GENERIC) { // Specific to generic + + if (TM.is64Bit() && TM.getPointerSizeInBits(SrcAddrSpace) == 32) { + SDValue CvtNone = + CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32); + SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u64_u32, DL, MVT::i64, + Src, CvtNone); + Src = SDValue(Cvt, 0); + } + unsigned Opc; switch (SrcAddrSpace) { default: report_fatal_error("Bad address space in addrspacecast"); @@ -1121,26 +1131,16 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { Opc = TM.is64Bit() ? NVPTX::cvta_global_64 : NVPTX::cvta_global; break; case ADDRESS_SPACE_SHARED: - Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32 - ? NVPTX::cvta_shared_6432 - : NVPTX::cvta_shared_64) - : NVPTX::cvta_shared; + Opc = TM.is64Bit() ? NVPTX::cvta_shared_64 : NVPTX::cvta_shared; break; case ADDRESS_SPACE_CONST: - Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32 - ? NVPTX::cvta_const_6432 - : NVPTX::cvta_const_64) - : NVPTX::cvta_const; + Opc = TM.is64Bit() ? NVPTX::cvta_const_64 : NVPTX::cvta_const; break; case ADDRESS_SPACE_LOCAL: - Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(SrcAddrSpace) == 32 - ? NVPTX::cvta_local_6432 - : NVPTX::cvta_local_64) - : NVPTX::cvta_local; + Opc = TM.is64Bit() ? NVPTX::cvta_local_64 : NVPTX::cvta_local; break; } - ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), - Src)); + ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getValueType(0), Src)); return; } else { // Generic to specific @@ -1153,30 +1153,28 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { Opc = TM.is64Bit() ? NVPTX::cvta_to_global_64 : NVPTX::cvta_to_global; break; case ADDRESS_SPACE_SHARED: - Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32 - ? NVPTX::cvta_to_shared_3264 - : NVPTX::cvta_to_shared_64) - : NVPTX::cvta_to_shared; + Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_64 : NVPTX::cvta_to_shared; break; case ADDRESS_SPACE_CONST: - Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32 - ? NVPTX::cvta_to_const_3264 - : NVPTX::cvta_to_const_64) - : NVPTX::cvta_to_const; + Opc = TM.is64Bit() ? NVPTX::cvta_to_const_64 : NVPTX::cvta_to_const; break; case ADDRESS_SPACE_LOCAL: - Opc = TM.is64Bit() ? (TM.getPointerSizeInBits(DstAddrSpace) == 32 - ? NVPTX::cvta_to_local_3264 - : NVPTX::cvta_to_local_64) - : NVPTX::cvta_to_local; + Opc = TM.is64Bit() ? NVPTX::cvta_to_local_64 : NVPTX::cvta_to_local; break; case ADDRESS_SPACE_PARAM: - Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64 - : NVPTX::nvvm_ptr_gen_to_param; + Opc = TM.is64Bit() ? NVPTX::IMOV64rr : NVPTX::IMOV32rr; break; } - ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), - Src)); + + SDNode *CVTA = CurDAG->getMachineNode(Opc, DL, N->getValueType(0), Src); + if (TM.is64Bit() && TM.getPointerSizeInBits(DstAddrSpace) == 32) { + SDValue CvtNone = + CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32); + CVTA = CurDAG->getMachineNode(NVPTX::CVT_u32_u64, DL, MVT::i32, + SDValue(CVTA, 0), CvtNone); + } + + ReplaceNode(N, CVTA); return; } } diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 0d9dd1b8ee70a..b82826089d3fe 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -174,10 +174,6 @@ def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">; def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" "&& Subtarget->getPTXVersion() >= 64)">; -def useShortPtrLocal : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_LOCAL) == 32">; -def useShortPtrShared : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_SHARED) == 32">; -def useShortPtrConst : Predicate<"TM.is64Bit() && TM.getPointerSizeInBits(ADDRESS_SPACE_CONST) == 32">; - def useFP16Math: Predicate<"Subtarget->allowFP16Math()">; def hasBF16Math: Predicate<"Subtarget->hasBF16Math()">; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 176d28c991207..f5ac3c4e96436 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -2543,59 +2543,45 @@ defm INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; -multiclass NG_TO_G { +multiclass NG_TO_G { def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), - !strconcat("cvta.", Str, ".u32 \t$result, $src;"), - [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; + "cvta." # Str # ".u32 \t$result, $src;", []>; def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), - !strconcat("cvta.", Str, ".u64 \t$result, $src;"), - [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; - def _6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), - "{{ .reg .b64 %tmp;\n\t" - #" cvt.u64.u32 \t%tmp, $src;\n\t" - #" cvta." # Str # ".u64 \t$result, %tmp; }}", - [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, - Requires<[ShortPtr]>; + "cvta." # Str # ".u64 \t$result, $src;", []>; } -multiclass G_TO_NG { +multiclass G_TO_NG { def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), - !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), - [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; + "cvta.to." # Str # ".u32 \t$result, $src;", []>; def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), - !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), - [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; - def _3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), - "{{ .reg .b64 %tmp;\n\t" - #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" - #" cvt.u32.u64 \t$result, %tmp; }}", - [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, - Requires<[ShortPtr]>; -} - -defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen, useShortPtrLocal>; -defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen, useShortPtrShared>; -defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen, False>; -defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen, useShortPtrConst>; -defm cvta_param : NG_TO_G<"param", int_nvvm_ptr_param_to_gen, False>; - -defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local, useShortPtrLocal>; -defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared, useShortPtrShared>; -defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global, False>; -defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant, useShortPtrConst>; + "cvta.to." # Str # ".u64 \t$result, $src;", []>; +} + +defm cvta_local : NG_TO_G<"local">; +defm cvta_shared : NG_TO_G<"shared">; +defm cvta_global : NG_TO_G<"global">; +defm cvta_const : NG_TO_G<"const">; + +defm cvta_to_local : G_TO_NG<"local">; +defm cvta_to_shared : G_TO_NG<"shared">; +defm cvta_to_global : G_TO_NG<"global">; +defm cvta_to_const : G_TO_NG<"const">; + +// nvvm.ptr.param.to.gen +defm cvta_param : NG_TO_G<"param">; + +def : Pat<(int_nvvm_ptr_param_to_gen Int32Regs:$src), + (cvta_param Int32Regs:$src)>; + +def : Pat<(int_nvvm_ptr_param_to_gen Int64Regs:$src), + (cvta_param_64 Int64Regs:$src)>; // nvvm.ptr.gen.to.param -def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), - (ins Int32Regs:$src), - "mov.u32 \t$result, $src;", - [(set Int32Regs:$result, - (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; -def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), - (ins Int64Regs:$src), - "mov.u64 \t$result, $src;", - [(set Int64Regs:$result, - (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; +def : Pat<(int_nvvm_ptr_gen_to_param Int32Regs:$src), + (IMOV32rr Int32Regs:$src)>; +def : Pat<(int_nvvm_ptr_gen_to_param Int64Regs:$src), + (IMOV64rr Int64Regs:$src)>; // nvvm.move intrinsicc def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), @@ -2638,24 +2624,6 @@ def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), [(set Int64Regs:$r, (int_nvvm_move_ptr texternalsym:$s))]>;*/ - -// MoveParam %r1, param -// ptr_local_to_gen %r2, %r1 -// ptr_gen_to_local %r3, %r2 -// -> -// mov %r1, param - -// @TODO: Revisit this. There is a type -// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym -// instructions are not currently defined. However, we can use the ptr -// variants and the asm printer will do the right thing. -def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen - (MoveParam texternalsym:$src)))), - (nvvm_move_ptr64 texternalsym:$src)>; -def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen - (MoveParam texternalsym:$src)))), - (nvvm_move_ptr32 texternalsym:$src)>; - def texsurf_handles : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), "mov.u64 \t$result, $src;", []>; diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll index 43ac246055da7..584c0ef7cfeb7 100644 --- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll +++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll @@ -35,6 +35,15 @@ declare i32 @llvm.nvvm.rotate.b32(i32, i32) declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) declare i64 @llvm.nvvm.rotate.b64(i64, i32) +declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr) +declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr) +declare ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr) +declare ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr) +declare ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1)) +declare ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3)) +declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4)) +declare ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5)) + ; CHECK-LABEL: @simple_upgrade define void @simple_upgrade(i32 %a, i64 %b, i16 %c) { ; CHECK: call i32 @llvm.bitreverse.i32(i32 %a) @@ -156,3 +165,29 @@ define void @rotate(i32 %a, i64 %b) { %r3 = call i64 @llvm.nvvm.rotate.b64(i64 %b, i32 8) ret void } + +; CHECK-LABEL: @addrspacecast +define void @addrspacecast(ptr %p0) { +; CHECK: %1 = addrspacecast ptr %p0 to ptr addrspace(1) +; CHECK: %2 = addrspacecast ptr addrspace(1) %1 to ptr +; CHECK: %3 = addrspacecast ptr %2 to ptr addrspace(3) +; CHECK: %4 = addrspacecast ptr addrspace(3) %3 to ptr +; CHECK: %5 = addrspacecast ptr %4 to ptr addrspace(4) +; CHECK: %6 = addrspacecast ptr addrspace(4) %5 to ptr +; CHECK: %7 = addrspacecast ptr %6 to ptr addrspace(5) +; CHECK: %8 = addrspacecast ptr addrspace(5) %7 to ptr +; + %p1 = call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr %p0) + %p2 = call ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1) %p1) + + %p3 = call ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr %p2) + %p4 = call ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3) %p3) + + %p5 = call ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr %p4) + %p6 = call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) %p5) + + %p7 = call ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr %p6) + %p8 = call ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5) %p7) + + ret void +} diff --git a/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll b/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll deleted file mode 100644 index 040bbde13800c..0000000000000 --- a/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: opt < %s -O3 -S | FileCheck %s - -; Address space intrinsics were erroneously marked NoCapture, leading to bad -; optimizations (such as the store below being eliminated as dead code). This -; test makes sure we don't regress. - -declare void @foo(ptr addrspace(1)) - -declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr) - -; CHECK: @bar -define void @bar() { - %t1 = alloca i32 -; CHECK: call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr nonnull %t1) -; CHECK-NEXT: store i32 10, ptr %t1 - %t2 = call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr %t1) - store i32 10, ptr %t1 - call void @foo(ptr addrspace(1) %t2) - ret void -} - diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll index 643ed6484ae9f..a255717926d6b 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-info.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll @@ -25,6 +25,10 @@ ; CHECK-DAG: .reg .b64 %rd<8>; ; CHECK: .loc [[DEBUG_INFO_CU:[0-9]+]] 5 0 ; CHECK: ld.param.u32 %r{{.+}}, [{{.+}}]; +; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; +; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; +; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; +; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; ; CHECK: .loc [[BUILTUIN_VARS_H:[0-9]+]] 78 180 ; CHECK: mov.u32 %r{{.+}}, %ctaid.x; ; CHECK: .loc [[BUILTUIN_VARS_H]] 89 180 @@ -38,10 +42,6 @@ ; CHECK: .loc [[DEBUG_INFO_CU]] 7 7 ; CHECK: @%p{{.+}} bra [[BB:\$L__.+]]; ; CHECK: ld.param.f32 %f{{.+}}, [{{.+}}]; -; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; -; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; -; CHECK: ld.param.u64 %rd{{.+}}, [{{.+}}]; -; CHECK: cvta.to.global.u64 %rd{{.+}}, %rd{{.+}}; ; CHECK: .loc [[DEBUG_INFO_CU]] 8 13 ; CHECK: mul.wide.u32 %rd{{.+}}, %r{{.+}}, 4; ; CHECK: add.s64 %rd{{.+}}, %rd{{.+}}, %rd{{.+}}; @@ -2665,22 +2665,22 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b32 4586 // DW_AT_type ; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8aa:0x18 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 707 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 $L__tmp0 // DW_AT_low_pc -; CHECK-NEXT:.b64 $L__tmp1 // DW_AT_high_pc +; CHECK-NEXT:.b64 $L__tmp1 // DW_AT_low_pc +; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 11 // DW_AT_call_column ; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8c2:0x18 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 1466 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 $L__tmp1 // DW_AT_low_pc -; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_high_pc +; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_low_pc +; CHECK-NEXT:.b64 $L__tmp3 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 24 // DW_AT_call_column ; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8da:0x18 DW_TAG_inlined_subroutine ; CHECK-NEXT:.b32 2060 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 $L__tmp2 // DW_AT_low_pc -; CHECK-NEXT:.b64 $L__tmp3 // DW_AT_high_pc +; CHECK-NEXT:.b64 $L__tmp3 // DW_AT_low_pc +; CHECK-NEXT:.b64 $L__tmp4 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_call_file ; CHECK-NEXT:.b8 6 // DW_AT_call_line ; CHECK-NEXT:.b8 37 // DW_AT_call_column