@@ -53,15 +53,15 @@ function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is not.
5353 ret float %add
5454 }
5555
56- define void @my_kernel(float* %ptr) {
57- %val = load float, float* %ptr
56+ define void @my_kernel(ptr %ptr) {
57+ %val = load float, ptr %ptr
5858 %ret = call float @my_fmad(float %val, float %val, float %val)
59- store float %ret, float* %ptr
59+ store float %ret, ptr %ptr
6060 ret void
6161 }
6262
6363 !nvvm.annotations = !{!1}
64- !1 = !{void (float*)* @my_kernel, !"kernel", i32 1}
64+ !1 = !{ptr @my_kernel, !"kernel", i32 1}
6565
6666 When compiled, the PTX kernel functions are callable by host-side code.
6767
@@ -140,10 +140,10 @@ These are overloaded intrinsics. You can use these on any pointer types.
140140
141141.. code-block :: llvm
142142
143- declare i8* @llvm.nvvm.ptr.global.to.gen.p0i8.p1i8(i8 addrspace(1)* )
144- declare i8* @llvm.nvvm.ptr.shared.to.gen.p0i8.p3i8(i8 addrspace(3)* )
145- declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* )
146- declare i8* @llvm.nvvm.ptr.local.to.gen.p0i8.p5i8(i8 addrspace(5)* )
143+ declare ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1))
144+ declare ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3))
145+ declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4))
146+ declare ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5))
147147
148148 Overview:
149149"""""""""
@@ -168,10 +168,10 @@ These are overloaded intrinsics. You can use these on any pointer types.
168168
169169.. code-block :: llvm
170170
171- declare i8 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i8.p0i8(i8* )
172- declare i8 addrspace(3)* @llvm.nvvm.ptr.gen.to.shared.p3i8.p0i8(i8* )
173- declare i8 addrspace(4)* @llvm.nvvm.ptr.gen.to.constant.p4i8.p0i8(i8* )
174- declare i8 addrspace(5)* @llvm.nvvm.ptr.gen.to.local.p5i8.p0i8(i8* )
171+ declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr )
172+ declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr )
173+ declare ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr )
174+ declare ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr )
175175
176176 Overview:
177177"""""""""
@@ -436,35 +436,33 @@ The Kernel
436436 ; Intrinsic to read X component of thread ID
437437 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
438438
439- define void @kernel(float addrspace(1)* %A,
440- float addrspace(1)* %B,
441- float addrspace(1)* %C) {
439+ define void @kernel(ptr addrspace(1) %A,
440+ ptr addrspace(1) %B,
441+ ptr addrspace(1) %C) {
442442 entry:
443443 ; What is my ID?
444444 %id = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
445445
446446 ; Compute pointers into A, B, and C
447- %ptrA = getelementptr float, float addrspace(1)* %A, i32 %id
448- %ptrB = getelementptr float, float addrspace(1)* %B, i32 %id
449- %ptrC = getelementptr float, float addrspace(1)* %C, i32 %id
447+ %ptrA = getelementptr float, ptr addrspace(1) %A, i32 %id
448+ %ptrB = getelementptr float, ptr addrspace(1) %B, i32 %id
449+ %ptrC = getelementptr float, ptr addrspace(1) %C, i32 %id
450450
451451 ; Read A, B
452- %valA = load float, float addrspace(1)* %ptrA, align 4
453- %valB = load float, float addrspace(1)* %ptrB, align 4
452+ %valA = load float, ptr addrspace(1) %ptrA, align 4
453+ %valB = load float, ptr addrspace(1) %ptrB, align 4
454454
455455 ; Compute C = A + B
456456 %valC = fadd float %valA, %valB
457457
458458 ; Store back to C
459- store float %valC, float addrspace(1)* %ptrC, align 4
459+ store float %valC, ptr addrspace(1) %ptrC, align 4
460460
461461 ret void
462462 }
463463
464464 !nvvm.annotations = !{!0}
465- !0 = !{void (float addrspace(1)*,
466- float addrspace(1)*,
467- float addrspace(1)*)* @kernel, !"kernel", i32 1}
465+ !0 = !{ptr @kernel, !"kernel", i32 1}
468466
469467
470468 We can use the LLVM ``llc `` tool to directly run the NVPTX code generator:
@@ -613,9 +611,7 @@ For the previous example, we have:
613611.. code-block :: llvm
614612
615613 !nvvm.annotations = !{!0}
616- !0 = !{void (float addrspace(1)*,
617- float addrspace(1)*,
618- float addrspace(1)*)* @kernel, !"kernel", i32 1}
614+ !0 = !{ptr @kernel, !"kernel", i32 1}
619615
620616 Here, we have a single metadata declaration in ``nvvm.annotations ``. This
621617metadata annotates our ``@kernel `` function with the ``kernel `` attribute.
@@ -820,35 +816,33 @@ Libdevice provides an ``__nv_powf`` function that we will use.
820816 ; libdevice function
821817 declare float @__nv_powf(float, float)
822818
823- define void @kernel(float addrspace(1)* %A,
824- float addrspace(1)* %B,
825- float addrspace(1)* %C) {
819+ define void @kernel(ptr addrspace(1) %A,
820+ ptr addrspace(1) %B,
821+ ptr addrspace(1) %C) {
826822 entry:
827823 ; What is my ID?
828824 %id = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
829825
830826 ; Compute pointers into A, B, and C
831- %ptrA = getelementptr float, float addrspace(1)* %A, i32 %id
832- %ptrB = getelementptr float, float addrspace(1)* %B, i32 %id
833- %ptrC = getelementptr float, float addrspace(1)* %C, i32 %id
827+ %ptrA = getelementptr float, ptr addrspace(1) %A, i32 %id
828+ %ptrB = getelementptr float, ptr addrspace(1) %B, i32 %id
829+ %ptrC = getelementptr float, ptr addrspace(1) %C, i32 %id
834830
835831 ; Read A, B
836- %valA = load float, float addrspace(1)* %ptrA, align 4
837- %valB = load float, float addrspace(1)* %ptrB, align 4
832+ %valA = load float, ptr addrspace(1) %ptrA, align 4
833+ %valB = load float, ptr addrspace(1) %ptrB, align 4
838834
839835 ; Compute C = pow(A, B)
840836 %valC = call float @__nv_powf(float %valA, float %valB)
841837
842838 ; Store back to C
843- store float %valC, float addrspace(1)* %ptrC, align 4
839+ store float %valC, ptr addrspace(1) %ptrC, align 4
844840
845841 ret void
846842 }
847843
848844 !nvvm.annotations = !{!0}
849- !0 = !{void (float addrspace(1)*,
850- float addrspace(1)*,
851- float addrspace(1)*)* @kernel, !"kernel", i32 1}
845+ !0 = !{ptr @kernel, !"kernel", i32 1}
852846
853847
854848 To compile this kernel, we perform the following steps:
0 commit comments